{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1读取数据\n",
    "# 2数据探索与预处理\n",
    "## 2.1标签的分布情况探索与可视化\n",
    "## 2.2缺失值的补全\n",
    "## 2.3数据分箱\n",
    "# 3计算WOE与IV值\n",
    "# 4根据计算结果再探索一下数据\n",
    "# 5WOE编码\n",
    "# 6开始用逻辑回归来进行建模\n",
    "## 6.1筛选特征\n",
    "## 6.2数据集切分\n",
    "## 6.3模型训练与评估\n",
    "# 7做评分卡模型\n",
    "# 8评分卡效果测试预览"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import math\n",
    "import numpy as np\n",
    "pd.set_option('display.width', 10000)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>SeriousDlqin2yrs</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>age</th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>DebtRatio</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfDependents</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.766127</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>0.802982</td>\n",
       "      <td>9120.0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.957151</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0.121876</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.658180</td>\n",
       "      <td>38</td>\n",
       "      <td>1</td>\n",
       "      <td>0.085113</td>\n",
       "      <td>3042.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0.233810</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.036050</td>\n",
       "      <td>3300.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0.907239</td>\n",
       "      <td>49</td>\n",
       "      <td>1</td>\n",
       "      <td>0.024926</td>\n",
       "      <td>63588.0</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149995</th>\n",
       "      <td>149996</td>\n",
       "      <td>0</td>\n",
       "      <td>0.040674</td>\n",
       "      <td>74</td>\n",
       "      <td>0</td>\n",
       "      <td>0.225131</td>\n",
       "      <td>2100.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>149997</td>\n",
       "      <td>0</td>\n",
       "      <td>0.299745</td>\n",
       "      <td>44</td>\n",
       "      <td>0</td>\n",
       "      <td>0.716562</td>\n",
       "      <td>5584.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>149998</td>\n",
       "      <td>0</td>\n",
       "      <td>0.246044</td>\n",
       "      <td>58</td>\n",
       "      <td>0</td>\n",
       "      <td>3870.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>149999</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5716.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>150000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.850283</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0.249908</td>\n",
       "      <td>8158.0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        Unnamed: 0  SeriousDlqin2yrs  RevolvingUtilizationOfUnsecuredLines  age  NumberOfTime30-59DaysPastDueNotWorse    DebtRatio  MonthlyIncome  NumberOfOpenCreditLinesAndLoans  NumberOfTimes90DaysLate  NumberRealEstateLoansOrLines  NumberOfTime60-89DaysPastDueNotWorse  NumberOfDependents\n",
       "0                1                 1                              0.766127   45                                     2     0.802982         9120.0                               13                        0                             6                                     0                 2.0\n",
       "1                2                 0                              0.957151   40                                     0     0.121876         2600.0                                4                        0                             0                                     0                 1.0\n",
       "2                3                 0                              0.658180   38                                     1     0.085113         3042.0                                2                        1                             0                                     0                 0.0\n",
       "3                4                 0                              0.233810   30                                     0     0.036050         3300.0                                5                        0                             0                                     0                 0.0\n",
       "4                5                 0                              0.907239   49                                     1     0.024926        63588.0                                7                        0                             1                                     0                 0.0\n",
       "...            ...               ...                                   ...  ...                                   ...          ...            ...                              ...                      ...                           ...                                   ...                 ...\n",
       "149995      149996                 0                              0.040674   74                                     0     0.225131         2100.0                                4                        0                             1                                     0                 0.0\n",
       "149996      149997                 0                              0.299745   44                                     0     0.716562         5584.0                                4                        0                             1                                     0                 2.0\n",
       "149997      149998                 0                              0.246044   58                                     0  3870.000000            NaN                               18                        0                             1                                     0                 0.0\n",
       "149998      149999                 0                              0.000000   30                                     0     0.000000         5716.0                                4                        0                             0                                     0                 0.0\n",
       "149999      150000                 0                              0.850283   64                                     0     0.249908         8158.0                                8                        0                             2                                     0                 0.0\n",
       "\n",
       "[150000 rows x 12 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#读取数据\n",
    "df_train=pd.read_csv('cs-training.csv')\n",
    "df_train#其实第一行完全是可以当索引的 读取数据的时候把第一行当成索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SeriousDlqin2yrs</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>age</th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>DebtRatio</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfDependents</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0.766127</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>0.802982</td>\n",
       "      <td>9120.0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0.957151</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0.121876</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0.658180</td>\n",
       "      <td>38</td>\n",
       "      <td>1</td>\n",
       "      <td>0.085113</td>\n",
       "      <td>3042.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0.233810</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.036050</td>\n",
       "      <td>3300.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "      <td>0.907239</td>\n",
       "      <td>49</td>\n",
       "      <td>1</td>\n",
       "      <td>0.024926</td>\n",
       "      <td>63588.0</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0</td>\n",
       "      <td>0.040674</td>\n",
       "      <td>74</td>\n",
       "      <td>0</td>\n",
       "      <td>0.225131</td>\n",
       "      <td>2100.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>0</td>\n",
       "      <td>0.299745</td>\n",
       "      <td>44</td>\n",
       "      <td>0</td>\n",
       "      <td>0.716562</td>\n",
       "      <td>5584.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0</td>\n",
       "      <td>0.246044</td>\n",
       "      <td>58</td>\n",
       "      <td>0</td>\n",
       "      <td>3870.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5716.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>0</td>\n",
       "      <td>0.850283</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0.249908</td>\n",
       "      <td>8158.0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        SeriousDlqin2yrs  RevolvingUtilizationOfUnsecuredLines  age  NumberOfTime30-59DaysPastDueNotWorse    DebtRatio  MonthlyIncome  NumberOfOpenCreditLinesAndLoans  NumberOfTimes90DaysLate  NumberRealEstateLoansOrLines  NumberOfTime60-89DaysPastDueNotWorse  NumberOfDependents\n",
       "1                      1                              0.766127   45                                     2     0.802982         9120.0                               13                        0                             6                                     0                 2.0\n",
       "2                      0                              0.957151   40                                     0     0.121876         2600.0                                4                        0                             0                                     0                 1.0\n",
       "3                      0                              0.658180   38                                     1     0.085113         3042.0                                2                        1                             0                                     0                 0.0\n",
       "4                      0                              0.233810   30                                     0     0.036050         3300.0                                5                        0                             0                                     0                 0.0\n",
       "5                      0                              0.907239   49                                     1     0.024926        63588.0                                7                        0                             1                                     0                 0.0\n",
       "...                  ...                                   ...  ...                                   ...          ...            ...                              ...                      ...                           ...                                   ...                 ...\n",
       "149996                 0                              0.040674   74                                     0     0.225131         2100.0                                4                        0                             1                                     0                 0.0\n",
       "149997                 0                              0.299745   44                                     0     0.716562         5584.0                                4                        0                             1                                     0                 2.0\n",
       "149998                 0                              0.246044   58                                     0  3870.000000            NaN                               18                        0                             1                                     0                 0.0\n",
       "149999                 0                              0.000000   30                                     0     0.000000         5716.0                                4                        0                             0                                     0                 0.0\n",
       "150000                 0                              0.850283   64                                     0     0.249908         8158.0                                8                        0                             2                                     0                 0.0\n",
       "\n",
       "[150000 rows x 11 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train=pd.read_csv('cs-training.csv',index_col='Unnamed: 0')\n",
    "df_train\n",
    "##SeriousDlqin2yrs表示90天以上逾期或更差1代表是 0代表否   \n",
    "#RevolvingUtilizationOfUnsecuredLines 除房地产和汽车贷款等无分期付款债务外，信用卡和个人信用额度的总余额除以信贷限额\n",
    "#age年龄                       NumberOfTime30-59DaysPastDueNotWorse借款人预期30-59天的次数 但在过去两年内没有更糟糕   \n",
    "#DebtRatio债务比（每月偿还的债务，赡养费，生活费除以每月的总收入）          #MonthlyIncome每月收入 \n",
    "#NumberOfOpenCreditLinesAndLoans 公开贷款(如汽车贷款或抵押贷款)和信用额度(如信用卡)的数量\n",
    "#NumberOfTimes90DaysLate 借款人逾期90天（或以上）的次数\n",
    "#NumberRealEstateLoansOrLines抵押贷款和房地产贷款的额度（包括房屋净值信贷）\n",
    "#NumberOfTime60-89DaysPastDueNotWorse借款人逾期60-89天的次数，但在过去两年没有更糟\n",
    "#NumberOfDependents除自己(配偶、子女等)以外的家庭受养人人数"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2数据探索与预处理"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.1标签的分布情况探索与可视化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    139974\n",
       "1     10026\n",
       "Name: SeriousDlqin2yrs, dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#看一下 SeriousDlqin2yrs的分布情况 \n",
    "df_train['SeriousDlqin2yrs'].value_counts()#里面的数据分布 139974条是0  10026是1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='SeriousDlqin2yrs', ylabel='count'>"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAECCAYAAADpdjDfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQw0lEQVR4nO3dfYxldX3H8fenPJjtLq2rjKtrWDfUrSkWV+LUwmaBgUItqbaR2GJaIUp1farGND5gIGqa+lAklkq1uBZhUUuybdQqRZQGkJUFzGyrEar2KQsWQl0o3elqq2b99o97tjvsAzu/uXPP7Oy8X8lmzv2ec+/5HjLcz/zO755zU1VIkjRTPzXfDUiSFhaDQ5LUxOCQJDUxOCRJTQwOSVKTo+e7gVE7/vjja/Xq1fPdhiQtKNu2bXukqsYOtO6ID47Vq1czOTk5321I0oKS5P6DrfNUlSSpicEhSWpicEiSmhgckqQmBockqYnBIUlqYnBIkpqMJDiSrEiyZZ/aLyb5crd8TJIbk2xNcvGwNUlSf+Y8OJIsBzYBS6fVAnwIOLYrvQmYrKp1wIuTHDdkTZLUk1FcOb4buAD4m2m1VwG3AS/qHk8Al3TLW4HxIWu3TW8gyQZgA8CqVauGPBx4wduuH/o1dOTZ9sGL5rsFaV7M+Yijqqaqaueex0meCrwCuGLaZkuBB7vlKWDFkLV9e9hYVeNVNT42dsBbrUiSZqmPyfEPAO+sqh9Pq+0ClnTLy7o+hqlJknrSx5vumcAfJ7kdeH6SPwK2Aeu79WuB7UPWJEk9Gfndcavq5/csJ7m9qi5L8izgpiSnAycB9zA4/TTbmiSpJyMbcVTVxMFqVXU/cC5wJ3BOVe0epjaqY5Ak7W/evo+jqh4CNs9VTZLUDyeWJUlNDA5JUhODQ5LUxOCQJDUxOCRJTQwOSVITg0OS1MTgkCQ1MTgkSU0MDklSE4NDktTE4JAkNTE4JElNDA5JUhODQ5LUxOCQJDUxOCRJTQwOSVITg0OS1MTgkCQ1GUlwJFmRZEu3vCrJ7UluTbIxA8ckuTHJ1iQXd9vNuiZJ6s+cB0eS5cAmYGlXei3w+qo6GzgBOBl4EzBZVeuAFyc5bsiaJKknoxhx7AYuAKYAqurSqvpWt+6pwCPABLC5q20FxoesPU6SDUkmk0zu2LFjbo5KkgSMIDiqaqqqdu5bT3IBcF9VPcRgNPJgt2oKWDFkbd8eNlbVeFWNj42NzclxSZIGepkcT3Ii8FbgLV1pF7CkW17W9TFMTZLUk5G/6XZzHjcAF08biWwD1nfLa4HtQ9YkST05uod9XAKsAq5KAvBuBpPnNyU5HTgJuIfB6afZ1iRJPRnZiKOqJrqf76iqZ1TVRPfvK1V1P3AucCdwTlXtHqY2qmOQJO2vjxHHAXWT5JvnqiZJ6ocTy5KkJgaHJKmJwSFJamJwSJKaGBySpCYGhySpicEhSWpicEiSmhgckqQmBockqYnBIUlqYnBIkpoYHJKkJgaHJKmJwSFJamJwSJKaGBySpCYGhySpicEhSWpicEiSmhgckqQmIwmOJCuSbOmWj0lyY5KtSS4eRU2S1J85D44ky4FNwNKu9CZgsqrWAS9OctwIapKknoxixLEbuACY6h5PAJu75a3A+Ahqj5NkQ5LJJJM7duwY+oAkSXvNeXBU1VRV7ZxWWgo82C1PAStGUNu3h41VNV5V42NjY3NxWJKkTh+T47uAJd3ysm6fc12TJPWkjzfdbcD6bnktsH0ENUlST47uYR+bgJuSnA6cBNzD4FTTXNYkST0Z2Yijqia6n/cD5wJ3AudU1e65ro3qGCRJ++tjxEFVPcTeT0KNpCZJ6ocTy5KkJgaHJKmJwSFJamJwSJKaGBySpCYGhySpicEhSWpicEiSmhgckqQmBockqYnBIUlqYnBIkpoYHJKkJgaHJKmJwSFJamJwSJKaGBySpCYGhySpicEhSWpicEiSmow8OJIsT3JTki1Jru5q1yTZmuSyadvNuiZJ6k8fI44LgU9V1enAcUneDhxVVeuAlUnWJDl/trUe+pckTTOr4EiyvmHzR4HnJHkycAKwGtjcrbsVWA9MDFE7UH8bkkwmmdyxY0dDq5KkQ5lRcCS5ZZ/S+xv28VVgDfBm4NvAk4AHu3VTwApg6RC1/VTVxqoar6rxsbGxhlYlSYdy9BOtTPI84BTgmUku6spLgf9t2Mf7gNdV1VSSPwDeC3y8W7eMQXjtApbMsiZJ6tGh3nhzgJ+PAr/dsI+fBk5OchTwy8AH2HuKaS2wHdg2RE2S1KMnHHFU1TeAbyR5TlVdP8t9vB+4FngWcBfwJ8CWJCuB84BTgRqiJknq0RMGxzRXJnk5cOyewkyDpKq+Bjx3ei3JBHAucHlV7Ry2Jknqz0yD42bgL4E5+YhSVT3G3k9HDV2TJPVnpsExVVVXjLQTSdKCMNPg+GqSG4Drge8DVNUdI+tKknTYmmlw/JjBNRi/xOCTVQUYHJK0CM00OLYzCIs9oSFJWqRaLqALg4vvzgfOGE07kqTD3YxGHFW1adrDq5N8dET9SJIOczMKjiTTRxg/wz7XZUiSFo+ZznGcxd65jR8BbxhNO5Kkw91M5zjeB/wH8BTgEeA7I+tIknRYm2lwfAJ4GvBF4JkM7j0lSVqEZnqq6oSqurBb/lKSr4yqIUnS4W2mwfFQkncC9wCnsffLlCRJi8xMT1W9jkHIvIzBN++9dmQdSZIOazMNjk8BD1TVG4DjGMx5SJIWoZkGx/I9FwFW1fuA40fXkiTpcDbTOY5/T/IO4GsMbnT4vdG1JEk6nM10xPFK4AcM5jj+B7hoVA1Jkg5vM71X1Q+Bq0bciyRpAWi5O64kSQaHJKmNwSFJatJbcCT5aJKXdMvXJNma5LJp62ddkyT1p5fgSHI68PSq+kKS84GjqmodsDLJmmFqffQvSdpr5MGR5Bjg48D2JL8JTACbu9W3AuuHrB1onxuSTCaZ3LFjx9wdjCSplxHHRcA/ApcDLwTeyN6bJE4BK4ClQ9T2U1Ubq2q8qsbHxsbm9GAkabGb6ZXjwzgF2FhVDyf5FLAOWNKtW8YgvHYNUZMk9aiPN95/AU7slseB1ew9xbQW2A5sG6ImSepRHyOOa4BPJHk5cAyDeYrPJ1kJnAecyuD7zLfMsiZJ6tHIRxxV9d9V9VtVdUZVnVZV9zMIj7uBs6pqZ1VNzbY26v4lSY/Xx4hjP1X1GHs/HTV0TZLUHyeXJUlNDA5JUhODQ5LUxOCQJDUxOCRJTQwOSVITg0OS1MTgkCQ1MTgkSU0MDklSE4NDktTE4JAkNTE4JElNDA5JUhODQ5LUxOCQJDUxOCRJTQwOSVITg0OS1MTgkCQ1MTgkSU16C44kK5L8Q7d8TZKtSS6btn7WNUlSf/occVwBLElyPnBUVa0DViZZM0ytx/4lSfQUHEnOBr4PPAxMAJu7VbcC64esHWh/G5JMJpncsWPH3B2IJGn0wZHkWOBdwCVdaSnwYLc8BawYsrafqtpYVeNVNT42NjZ3ByNJ6mXEcQnwkar6r+7xLmBJt7ys62GYmiSpR3288Z4DvDHJ7cDzgZew9xTTWmA7sG2ImiSpR0ePegdVdcae5S48fgPYkmQlcB5wKlBD1CRJPer1VE9VTVTVFINJ7ruBs6pq5zC1PvuXJPUw4jiQqnqMvZ+OGromSeqPk8uSpCYGhySpicEhSWpicEiSmhgckqQmBockqYnBIUlqYnBIkpoYHJKkJgaHJKmJwSFJamJwSJKaGBySpCYGhySpicEhSWpicEiSmhgckqQmBockqYnBIUlqYnBIkpqMPDiS/GySLya5Jclnkxyb5JokW5NcNm27WdckSf3pY8Txu8CHqupc4GHg5cBRVbUOWJlkTZLzZ1vroX9J0jRHj3oHVfXRaQ/HgFcAV3aPbwXWA6cAm2dZ++d995lkA7ABYNWqVXNzIJIkoMc5jiSnAcuB7wIPduUpYAWwdIjafqpqY1WNV9X42NjYHB+JJC1uvQRHkqcAVwEXA7uAJd2qZV0Pw9QkST3qY3L8WAanl95ZVfcD2xicYgJYC2wfsiZJ6tHI5ziA3wNeAFya5FLgWuDCJCuB84BTgQK2zLImSerRyEccVfXnVbW8qia6f5uACeBu4Kyq2llVU7Otjbp/SdLj9THi2E9VPcbeT0cNXZMk9cfJZUlSE4NDktTE4JAkNTE4JElNDA5JUhODQ5LUxOCQJDUxOCRJTeblAkBJc+OBPzx5vlvQYWjVu7450td3xCFJamJwSJKaGBySpCYGhySpicEhSWpicEiSmhgckqQmBockqYnBIUlqYnBIkpoYHJKkJgaHJKnJggyOJNck2ZrksvnuRZIWmwUXHEnOB46qqnXAyiRr5rsnSVpMUlXz3UOTJB8Gbq6qm5K8DDiuqq7dZ5sNwIbu4XOA7/Tc5pHseOCR+W5COgB/N+fWs6pq7EArFuL3cSwFHuyWp4Bn77tBVW0ENvbZ1GKRZLKqxue7D2lf/m72Z8GdqgJ2AUu65WUszGOQpAVrIb7pbgPWd8trge3z14okLT4L8VTV54AtSVYC5wGnzm87i46nAHW48nezJwtuchwgyXLgXOCOqnp4vvuRpMVkQQaHJGn+LMQ5DknSPDI4NGNesa/DWZIVSbbMdx+LgcGhGfGKfR3OunnPTQyu89KIGRyaqQlgc7d8K3s/Ei0dDnYDFzC4KFgjthA/jqv5ccgr9qX5UlVTAEnmu5VFwRGHZsor9iUB/s+vmfOKfUmAp6o0c5/DK/Yl4QWAauAV+5LA4JAkNXKOQ5LUxOCQJDUxOCRJTQwOHdGSLE3y2SRfSfLJNF4hluTKOerjuiRfTzKZ5DX7rLt9Bs9/epJLDrHNqiS3J7k1ycbWY5VmyuDQke5C4K6qOhP4IdD0ndRV9ZY57OX3gRcB707yvMY+Hq6qDxxis9cCr6+qs4ETgJNn16b0xAwOHekeBF6aZE1VvRq4L8lfJ7kjyUf2bNT9pf7BJF+a/uTpo4EkT0pyQzd6+XSSY5O8J8lEt/6V3b8lSW7s9vGZJP9/vVRVPQr8LXDGgZpN8uwkd3Wjhm1JVnf11Umum7bddUnelWRLd8fiJVV1aVV9q9vkqcAjSTYlOa17zrVJTu1e69Pd42u7dU9LcluSryb52Cz/W2uRMDh0RKuqLwB/AnwmyYeB1wP3VtUZwDOm/eV/KoORyYue4OVe0z33TOCfgIsPst1JwE+6fWxkcIuW6R4FnnyQ574deC+D62We9ETHBiyrqtOBrwOn7CkmuQC4r6oeAq4HfifJscBJVXV3t9lLgI9V1au6x6cD36yq9cDfJfG9QQflL4eOaN3t328Gng+MMThd9NJuJHEi8Mxu03ur6jOHeLmTgHu65XuAX9hn/Z57ef09cG+SLzN4g/7BPts9BfjPg+xjFYM3/d3ANw/Rz6bu5/eAYwGSnAi8FXhLt+424DTg14HPT3vul6eFCMAXgaOS3AI8r6p+coh9axEzOHSkezXw0u6N+F7gauDKqpoALgMe6LbbNYPXuo+9t1o5tXv8I+C4rvZr3c+1wJ1V9avAcgZ/zQOQ5MkMbtly60H28W/AyUmO6l7niXx/+oPuyv4bgIuraidAFwC3AFcAn5y2+b7Hexrwyao6Fzg7yc8dYt9axAwOHen+FHhlN8J4IfBnwHlJ7gBeB3y34bX+Anhu99w1wHUM/op/W5KrGZyCgsENIN+cZCvwdGCyq1/FYPTzjqr69kH28UHgEuBLzCzMpruEwYjlqm7O5syu/lfAA1X1wMGfyr8Clye5i8EI5v7GfWsR8ZYj0mGqmwx/T1VtH+I1fgW4HLi0qm6eo9a0yBkckqQmnqqSJDUxOCRJTQwOSVITg0OS1MTgkCQ1+T88Tjafg9W2bQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#做一下可视化 \n",
    "import seaborn as sns\n",
    "sns.countplot(x='SeriousDlqin2yrs',data=df_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.06684"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#看一下违约的比例\n",
    "df_train['SeriousDlqin2yrs'].sum()/len(df_train)#这个就是违约的比例"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.2缺失值的补全"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SeriousDlqin2yrs                            0\n",
       "RevolvingUtilizationOfUnsecuredLines        0\n",
       "age                                         0\n",
       "NumberOfTime30-59DaysPastDueNotWorse        0\n",
       "DebtRatio                                   0\n",
       "MonthlyIncome                           29731\n",
       "NumberOfOpenCreditLinesAndLoans             0\n",
       "NumberOfTimes90DaysLate                     0\n",
       "NumberRealEstateLoansOrLines                0\n",
       "NumberOfTime60-89DaysPastDueNotWorse        0\n",
       "NumberOfDependents                       3924\n",
       "dtype: int64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#看看里面数据里面是否有缺失值\n",
    "df_train.isna().sum()#可以看到 MonthlyIncome 有29731个缺失值      NumberOfDependents 有3924个缺失值   总数据有15万个  这个时候需要填补"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SeriousDlqin2yrs                           0.066840\n",
       "RevolvingUtilizationOfUnsecuredLines       6.048438\n",
       "age                                       52.295207\n",
       "NumberOfTime30-59DaysPastDueNotWorse       0.421033\n",
       "DebtRatio                                353.005076\n",
       "MonthlyIncome                           6670.221237\n",
       "NumberOfOpenCreditLinesAndLoans            8.452760\n",
       "NumberOfTimes90DaysLate                    0.265973\n",
       "NumberRealEstateLoansOrLines               1.018240\n",
       "NumberOfTime60-89DaysPastDueNotWorse       0.240387\n",
       "NumberOfDependents                         0.757222\n",
       "dtype: float64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.mean()#看一下数据的平均数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SeriousDlqin2yrs                           0.000000\n",
       "RevolvingUtilizationOfUnsecuredLines       0.154181\n",
       "age                                       52.000000\n",
       "NumberOfTime30-59DaysPastDueNotWorse       0.000000\n",
       "DebtRatio                                  0.366508\n",
       "MonthlyIncome                           5400.000000\n",
       "NumberOfOpenCreditLinesAndLoans            8.000000\n",
       "NumberOfTimes90DaysLate                    0.000000\n",
       "NumberRealEstateLoansOrLines               1.000000\n",
       "NumberOfTime60-89DaysPastDueNotWorse       0.000000\n",
       "NumberOfDependents                         0.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.median()#这个是中位数  #看看这两个数据的对对比  然后 填充缺失值的打算采用中位数的方式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SeriousDlqin2yrs                        0\n",
       "RevolvingUtilizationOfUnsecuredLines    0\n",
       "age                                     0\n",
       "NumberOfTime30-59DaysPastDueNotWorse    0\n",
       "DebtRatio                               0\n",
       "MonthlyIncome                           0\n",
       "NumberOfOpenCreditLinesAndLoans         0\n",
       "NumberOfTimes90DaysLate                 0\n",
       "NumberRealEstateLoansOrLines            0\n",
       "NumberOfTime60-89DaysPastDueNotWorse    0\n",
       "NumberOfDependents                      0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#填充一下缺失值\n",
    "df_train['MonthlyIncome']=df_train['MonthlyIncome'].fillna(df_train['MonthlyIncome'].median())\n",
    "df_train['NumberOfDependents']=df_train['NumberOfDependents'].fillna(df_train['NumberOfDependents'].median())\n",
    "#填充完之后再看看是否有缺失值\n",
    "df_train.isna().sum()#这样缺失值就没有了 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.3数据分箱"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "#下面开始做数据分箱\n",
    "#对于age字段，分成6段 [-math.inf, 25, 40, 50, 60, 70,math.inf]\n",
    "#对于NumberOfDependents（家属人数）字段，分成6段[-math.inf,2,4,6,8,10,math.inf]\n",
    "#对于3种逾期次数，即NumberOfTime30-59DaysPastDueNotWorse， NumberOfTime60-89DaysPastDueNotWorse， NumberOfTimes90DaysLate，分成10段\n",
    "#[-math.inf,1,2,3,4,5,6,7,8,9,math.inf]\n",
    "#对于其余字段，即RevolvingUtilizationOfUnsecuredLines, DebtRatio,MonthlyIncome, NumberOfOpenCreditLinesAndLoans,NumberRealEstateLoansOrLines \n",
    "#分成5段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "49     3837\n",
       "48     3806\n",
       "50     3753\n",
       "63     3719\n",
       "47     3719\n",
       "       ... \n",
       "101       3\n",
       "109       2\n",
       "107       1\n",
       "105       1\n",
       "0         1\n",
       "Name: age, Length: 86, dtype: int64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对于age字段，分成6段 [-math.inf, 25, 40, 50, 60, 70,math.inf] \n",
    "#在做之前 先看看年龄的分布\n",
    "df_train['age'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        1\n",
       "21     183\n",
       "22     434\n",
       "23     641\n",
       "24     816\n",
       "      ... \n",
       "102      3\n",
       "103      3\n",
       "105      1\n",
       "107      1\n",
       "109      2\n",
       "Name: age, Length: 86, dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#按照年龄从小到大来排序\n",
    "df_train['age'].value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/stu_15527388015/.local/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n",
      "  FutureWarning\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABJMAAAHtCAYAAACtYz+SAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAA7/UlEQVR4nO3de7xtZVkv8N8jWwvBuzsUEylvmSma20uISiYKKiiX0tK8H7ygmR4LTdMsL2l2Me+koalpxMULoqQpB4o0IdH0RCdTMBWTOgrq8Yrv+WOMDZO551zj3Wzm3ovN9/v5rM+e813vM8czx3rWmHM/6x1jVmstAAAAANDjGjs6AQAAAACuOjSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAGA7qKqau79hR+UCALAtNJMAALZCVf1Ix5xdqmqXmfvXSHJ+VV13ZtrLquqoVeS4IJ+fGv/dvap+uTPmdVX1oIk5r6yqx1TVNapqr6q6SVU9oqpOHG//eFVdZ5xbs/sEALjq0kwCAK5UVXVeVe093j64qj5fVRdW1cO2Yw4Pq6pXbsX8U6vqs1V1zvjv/x1vn1NV36yq289MP3vme+dU1dfG53jpWJJPJjlkJuZWST7XWrt4ZuxeSU7ZlufZ+dxunOTMqvrxJN9O8ttVdceOmF9K8o8TD39xku8muXaS3x6/fjHJ7ZI8b7x/p3Hu85K8Ynz8R4w1cdbM15er6veuyHMEALYvy6sBgFX63SSPSvJ3Sa4/Nbmqfqe19jvbutHW2l8l+autCPl+kie01k6rqvsleWRr7TFjTqeN39/82D8zk29laBzdt7X2X4seuKqen2EfXG9sNH0gyZ8luW2SD45nv+2R5IDW2ke3Iudez0nyltbaF8d8npvkL6pq39ba/5vJ8wtJvpWhObRnkh/M5LfZLkne2lp7eVW9IMmPJblpkr2T7J5kr/HfGyS5Q5LrJnnWGPv6DI240zPsz3e31p4ws/3fSXLJlfrMAYCV0EwCAFbpBkn+o7XWknytY/4LkvzOSjNa7HtJ3lBV30hynSQ3qqqzxu/dNskPN0+sqlsleed495pJbpnkA3NNl3e01v5wvH2TJM9qrb1rbFT9aobm0m+01o4ZH/OvMzRxrlRV9XNJfjnJpQ2w1tpJVfWQJO+vqge31r4xju81xvxkkg8nuXOSRyc5s7W2aIXSYRlWVl2U5D5Jfj/DftyU5MAkv5ekktwmydmttQur6vHjnI1LUm7b9owBgO3BaW4AwDYZr5fzuqq6oKrenqHB8uCq+kqSmyf5eFV9paquvcZjvGycn3HuZ2a+9+aqenJV/XlV/dvM+EPHU9K+XFWvmnu8x1TVm2fu7z2efvfM8fE/VVU3mQm5RpInttY2JTkqycmttU3j/bNz+fdMuyT55vi9f0myz3j7m0n2T/InGRpIm/0wl3e9JI/JZQ2pjPvsu2s9r6rav6o+V1XnVtVbquoD4/jdxtPEvlRVx4yrpVJVN0vyjiRPbq3937kcnpDkC0k+XVUHzWzjWknemuSZrbWvZVhddKO52M3zfjjmfF6SZ2Y4Le7hGZpr/zbe/uUMjaUkSWvtg621/zXuw0W8NwWAqwAv2ADAtjoiyc8m+YkkJ2Y4Rerk1tpNkvxHkru21m4ye0rVvNba0eP8jHNvPzflOUn+PsndZ8aemOH6PHsl2b+qfnoizz0zNHn2zNBImb0Q9Y8keeN4Gtrrkxwyc/2jTRmaPZvNn4r1zqr6uwwreZbNmfX1DI2Xl8w0tK6Vy1YmLXteL0vy5CRPT3LL1tqBY1Pn7RlWEO2V4Wfw0HH+MUn+IMkrxusTfbeqvlhVX0xyYYYVQn+aYSVWquqa42PdI8lLx+f+0CSvHvfFF6rqFeNj75GZZlhr7V8y/KzvMu6vzV/faa29ePbJV9XPJvlGkrtX1TfGRth3kzx4zAsAWOc0kwCAbbVvkuNba99prZ2QoVlyZTultfamuRU2j8twEet3JLl1huv3rKUl+Z3W2g8zrDa63qXfaO2g1tqtWmt3SvKkJO9prd1p/Nq9tfaZxQ+ZJHl4a22/JJ9YY84fzzSq0lr7WIaVTE8fv/+jSb4z8by+k6HpdK1ctrLnthmuV/S3Sb6UoZmzufl0aGvtNa21W2doNp3SWvvx1tqPZ2hMnd9a+8PW2nFVdaMMp6z9MMnfJHnYuC9OSnLUePv5GRterbX/aK29bO457p3k+CTPHr/ekGGV0qWq6j5J3pLk/UkeP+Z9jyQXjCvBXrfGPgQA1gnXTAIAtlXl8te6mT+t68pwuQtTV9X1k5yVYeXNi7P8GjyzvjKzOupy1+apqs2NoEuy5TWTrp2hCXXcksd9Z1V9J8MpYcs8Y+aaSY8cx/4kw6esPXfcxsUTz+szSV6a4f3b4zennuSzrbXbjc/jRzM2mlpr35uJvU+Sj83cv1ku3/z6f0nelaHZ9e41nsda1zT6QYbm0Q/G+zednV9V10jyRxlOoWvjiqvz13g8AGCd0kwCALbVPyY5ary+z4FJbrgNj/XfVXWLJF9Ocu3W2kVL5t0qQ+PntUn2ybAiZ8rSRkhr7dJT1OY/za3Dw1trnx0/9a1ba+0rVXWH1toPq+o6GVYq3SULnldV7ZLkkCS3mTtd8Nwk166qe2U4DfCtGT4575Uzz+dmGa7RdMeZuD2TvHcml28nec04f4uV6+P298hljaJl9s9lp/tdO8nnZr73xAwrkD44XtfpKUl+fW47N0pSyz4ZDwBYH5zmBgBsq3dmuBD1FzKcIvaVbXis38zQFLkgl29+zPtkknMynNr1u0n+OcOnhm218bpDU3M2VNXmP8JVkruMK5fun+Sk8fZdkpw+5jP7HuuaufxpbrPXX/p+Vf1MkkvGT7xb+Lxaa5eM418Yr1304bER9b0kD0vyqjHmO0kuPVWsqm6X4ZPZnt9a+8+q2rWqbj7mOtvomfUjM7c3ZFjpdGySZ2Q4BW7WtWaea2VYgbXfeNrfw2fy+JEMP9v/Oe7vP0vyxdbamRmafJt/Bj+f5G1L8gIA1gkrkwCAbTI2Oh6/5Ht7b+Vj/XmSP58be8yCed/PsFJn2eO8OcmbZ+6fl+GaPpvv/87M9H+tqpbhgtSXqqpzZ+7ukuHUs2MyNIPObq3tv2jbVfXIDBef3uzEJH8wrl66SYbrIG1eAfSGDM23w9d6XlW1+SLgGzM0bZ6V4dpKz2itfTTJnRbEHJFhtdEzW2tvH4f3SXJchmtcfXZR/kkOSvL98fY1MzR6Hj/mNu+auayZNN+Ue0mGVWtprX23qm6f5LpJ/j1Dw/AR4/cuqapPj8223cbnBgCsYzX8EQwAYPWq6uNJbr7gW7dorX13wThJqup6GVbsbMpwGtx5SZ7QWjt3jZhKcr3W2te3R469quq2rbV/3dF5AABXnGYSAAAAAN1cMwkAAACAbppJAAAAAHS7Sl+A+8Y3vnHbe++9d3QaAAAAADuNs88++79aaxuXfX+lzaSq2iPJB1prd66qNyW5XZJTWmsvGr/fNbbM3nvvnbPOOmuVTwEAAADgaqWqzl/r+6s+ze0VSXatqsOS7NJa2zfJnlV1696xFecHAAAAwFZYWTOpqu6b5FtJvpJk/yTHjd/6cJL9tmIMAAAAgHViJc2kqrpWkucnefY4tFuSL423L06yx1aMzT/2kVV1VlWddeGFF64ifQAAAACWWNXKpGcneU1r7evj/W8m2XW8vfu43d6xy2mtHdNa29Ra27Rx49JrQQEAAACwAqtqJt0vyVFVdVqSOyU5OJedsrZPkvOSnN05BgAAAMA6sZJPc2ut3Xvz7bGhdEiSM6pqzyQHJblHktY5BgAAAMA6sepPc0trbf/W2sUZLq790SQ/31q7qHds1fkBAAAA0G8lK5MWaa19LZd9UttWjQEAAACwPqx8ZRIAAAAAOw/NJAAAAAC6aSYBAAAA0E0zCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQbcOOTgAAlnnEuw7smvf2h35gxZkAAACbWZkEAAAAQDfNJAAAAAC6aSYBAAAA0M01kwDYLo46se/6R685zPWPAABgPbMyCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3VyAG4CdykHveWjXvPcf8q6V5gEAADsrK5MAAAAA6KaZBAAAAEA3zSQAAAAAurlmEsDV3AuOO7B77gt/6QNJkt88vi/m5Ud84ArlBDuzBx//9q55Jx/xiBVnAgBwxViZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTTTAIAAACgm2YSAAAAAN027OgEAODq4oHv+q2ueac89CUrzgQAAK44K5MAAAAA6GZlEgBXewe9+wld897/kDeuOBMAAFj/rEwCAAAAoJuVSQBwBRz0rmd0zXv/Q/94xZkAAMD2ZWUSAAAAAN2sTAIA8qATX9E1732HPWvFmQAAsN5ZmQQAAABANyuTAACuoAcf/xdd804+4lErzgQAYPvRTAKAdeyBJ72wa94ph75gxZkAAMDAaW4AAAAAdNNMAgAAAKDbyppJVXXDqjqgqm68qm0AAAAAsH2t5JpJVXXTJCcmOTnJH1XVfZOcneRz45Sntdb+uapemOSBST7WWnvqGLvFGADAqj34hGO75p18+GNXnAkAwPq2qpVJt0/yjNbai5OcmuRxSd7RWtt//PrnqtqUZL8kd0vyxaq636KxFeUHAAAAwBWwkpVJrbUPJUlV3TtDY+j4JIdW1T2TnJ/k0UnuneSE1lqrqg8lOTjJRQvGPrSKHAF2Ri955wO65/7Ww09dYSYAAMDOapXXTKokD0vy/SSfTHKf1tp+Sb6e4TS23ZJ8aZx+cZI9lozNP+6RVXVWVZ114YUXrip9AAAAABZYWTOpDY5KcmaSm7TWLhi/dW6SWyf5ZpJdx7Hdx1wWjc0/7jGttU2ttU0bN25cVfoAAAAALLCSZlJVHV1VjxrvXj/J66tqn6raJcmhGVYqnZ3h+khJsk+S85aMAQAAALBOrOSaSUmOSXJcVT0hyaczXB/p7UkqyXtaax+qqmskeWlVvTLJgePX+QvGAAAAAFgnVnUB7q8lOWBu+I5zc344flrbg5K8srX2+SRZNAYAAADA+rCqlUldWmvfzvBJb2uOAQAAALA+7NBmEgBw9fKgE1/TNe99hx214kwAALiiVvZpbgAAAADsfDSTAAAAAOjmNDcA2Mk88KSXds075dDnrDgTAAB2RlYmAQAAANBNMwkAAACAbppJAAAAAHRzzSQA4Ap50Imv7Jr3vsOevuJMAADYnqxMAgAAAKCbZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB027CjEwBgsVe84wHdc5/1y6euMBMAAIDLaCYBAOvag054fde89x3+pBVnAgBA4jQ3AAAAALaCZhIAAAAA3ZzmBgDsdB50whu75r3v8CesOBMAgJ2PlUkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOjmAtwA28Gfvv0B3XN/7RGnrjATAACAbWNlEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTTTAIAAACg24YdnQDAjvRnf/GA7rn/41GnrjATAACAqwYrkwAAAADoppkEAAAAQDenuQE7jWPfcv/uuY999N+sMBMAAICdl5VJAAAAAHSzMglgK732bf0X7X7KI120GwAA2LlYmQQAAABANyuTAADWuQcf/86ueScf8fAVZwIAYGUSAAAAAFtBMwkAAACAbitrJlXVDavqgKq68aq2AQAAAMD2tZJmUlXdNMn7ktwtyUeqamNVvamqzqyq583M6xoDAAAAYH1Y1cqk2yd5RmvtxUlOTXLfJLu01vZNsmdV3bqqDusZW1F+AAAAAFwBK/k0t9bah5Kkqu6dYXXSDZMcN377w0n2S3LnzrF/W0WOAAAAAGy9VV4zqZI8LMn3k1SSL43fujjJHkl26xybf9wjq+qsqjrrwgsvXFX6AAAAACywkpVJSdJaa0mOqqrfS3JEkl3Hb+2eoYn1zc6x+cc9JskxSbJp06a2qvyBHettb35A99xHPubUFWYCAADArFVdgPvoqnrUePf6SX4/wylrSbJPkvOSnN05BgAAAMA6saqVScckOa6qnpDk00neleT0qtozyUFJ7pGkJTmjYwwAAACAdWJVF+D+WpIDZseqav9x7OWttYu2ZgwAAACA9WFl10yaNzaYjrsiYwAAAACsDyv7NDcAAAAAdj6aSQAAAAB000wCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTTTAIAAACgm2YSAAAAAN00kwAAAADoppkEAAAAQDfNJAAAAAC6aSYBAAAA0E0zCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbht2dALAzu+vjj2we+7DHvuBFWYCAADAtrIyCQAAAIBumkkAAAAAdNNMAgAAAKCbayYBAOyEHnz8X3fNO/mIX1xxJgDAzsbKJAAAAAC6aSYBAAAA0E0zCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOi2YRUPWlXXS/LO8fG/meRhST6b5HPjlKe11v65ql6Y5IFJPtZae+oYu8UYsH6ccOyB3XMPf+wHVpgJAAAAO8KqViY9IskftdYOSPKVJM9O8o7W2v7j1z9X1aYk+yW5W5IvVtX9Fo2tKD8AAAAAroCVrExqrb125u7GJP+R5NCqumeS85M8Osm9k5zQWmtV9aEkBye5aMHYh1aRIwAA2+6Q49/TNe89Rxyy4kwAgO1lJc2kzarq55LcIMkHkxzbWrugql6T4TS23ZL8+zj14iR7JPnBgrH5xzwyyZFJstdee60yfQCAq5WDjz+xa957jzhsxZkAAOvZyi7AXVU3TPKqJI9L8qnW2gXjt85NcusM11LadRzbfcxl0djltNaOaa1taq1t2rhx46rSBwAAAGCBlTSTqupaSY5L8pzW2vlJ3lpV+1TVLkkOTfLJJGdnuD5SkuyT5LwlYwAAAACsE6s6ze3xSe6S5LlV9dwkH0ny1iSV5D2ttQ9V1TWSvLSqXpnkwPHr/AVjAAAAAKwTq7oA9+uSvG5u+IVzc344flrbg5K8srX2+SRZNAYAAADA+rDSC3BPaa19O8nxU2MAAAAArA8ruwA3AAAAADsfzSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTTTAIAAACgm2YSAAAAAN027OgEgB3nPX9+UPfcQx73/hVmAgAAwFWFlUkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEC3DTs6AeDKccqbHtg994GPP2WFmQAAALAz00yCFTvtzx7UPXf///G+FWYCAOvDIcf3vd6954j+11AAYPtxmhsAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoNuGHZ0AsKUPvvGB3XMPeMIpK8wEAAAALs/KJAAAAAC6aSYBAAAA0E0zCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOjW3UyqqgcsGLvLlZsOAAAAAOvZhrW+WVV7JrkkyfeTPL2qzkuyS5KLk9wlyVOTHLDiHAEAAABYJ9ZsJiU5K0lL8pcZGkgvT/KzSY5Ncp8kF600OwAAAADWlalm0r9maCadm+RmSU5Pct0k/5Xkh6tNDdafvz/mwd1z73nkySvMBAAAAHaMqWbSZi1JJbl1kh/LcIrbHkmuvaK8YOX+8Q0Hd8+92xPfu8JMAAAA4Krjin6aW5v5AgAAAOBqYmubSf8nyX8m+ackXx2/tlBV16uq91fVB6vqpKq6VlW9qarOrKrnzczrGgMAAABgfZhqJv1UktsluX2G09zuk+Q2SW6c4VPdlnlEkj9qrR2Q5CtJHp5kl9bavkn2rKpbV9VhPWPb8uQAAAAAuHJNXTPpLkm+l+SSJO9I8usZGlDfSPK5JI9cFNRae+3M3Y3jvD8Z7384yX5J7pzkuI6xf5t97Ko6MsmRSbLXXntNpA8AAADAlWnNZlJr7ctJUlWbkry9tXbuzLffXFWfXCu+qn4uyQ2SnJfkS+PwxUlulWS3zrH5nI5JckySbNq0yTWbAAAAALajyWsmVVUleXeSO1fVU6vqruP4XZK8do24GyZ5VZLHJflmkl3Hb+0+brd3DAAAAIB1Ys1mTVVVa60l+WySN2S4APdBVXV2klcn+cUlcdfKcLrac1pr5yc5O8Mpa0myT4aVSr1jAAAAAKwTU9dMen9VfTPJ9TKccnaXJJuSvDfDRbk3JvnigrjHj3OfW1XPTXJskl+tqj2THJTkHklakjM6xgAAAABYJ6aaSUck2TvJUUl+N8kFSR7SWrukqvZO8udV9Qvj6qVLtdZel+R1s2NV9Z4kByR5eWvtonFs/54xAACu3h5y/Kld8959xANWnAkAMNVMekqGFUn/neRTGT6R7fNV9fYkP5Pk6PlG0jKtta/lsk9q26oxAAAAANaHqWbSdcd/983wKWsbknwyyTlJHpzkMyvLDLbCJ15/cPfcOz/pvSvMBAAAAHZuU5+W9sEkn09y5yTnJjk0w4WxD0zy8iS/tdLsAAAAAFhXpppJ903yvQyf3HbbJG/N8MluH22tvTXJLatq6jEAAAAA2EmseZpba+0FVbVbhk9zu2Sc/7zW2pnjlCe31n644hwBAAAAWCemrpmU1tq3knxrZuhLM9/7+gpyAgAAAGCdcooaAAAAAN00kwAAAADoppkEAAAAQDfNJAAAAAC6aSYBAAAA0E0zCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQbcOOTgAAAFbhocf/bde8dx3xCyvOBAB2LlYmAQAAANBNMwkAAACAbppJAAAAAHTTTAIAAACgm2YSAAAAAN00kwAAAADoppkEAAAAQLcNOzoBWOQzrz2ka97tn/KeFWcCAAAAzLIyCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3VyAGwAAtsFhJ/x917wTD7/nijMBgO1DMwkAAEaHnvC/uuaddPh9VpwJAKxfTnMDAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTbsKMTYOf32Vc/pGverZ767hVnAgAAAGwrK5MAAAAA6KaZBAAAAEA3zSQAAAAAuq3smklVtUeS41tr96qqmyX5WJLPjt/+xdbahVX1piS3S3JKa+1FY9wWYwAAsLM4/IR/7J57wuF3W2EmAHDFrGRlUlXdIMlbkuw2Dt09yYtba/uPXxdW1WFJdmmt7Ztkz6q69aKxVeQHAAAAwBWzqtPcLknysCQXj/fvkeQpVfUPVfXH49j+SY4bb384yX5Lxi6nqo6sqrOq6qwLL7xwNdkDAAAAsNBKmkmttYtbaxfNDL0/yb6ttZ9LcpuqumOGVUtfGr9/cZI9lozNP/YxrbVNrbVNGzduXEX6AAAAACyxsmsmzTmztfbd8fa5SW6d5JtJdh3Hds/Q2Fo0BgAAAMA6sb2aNadW1U2r6tpJHpDk00nOzmWnse2T5LwlYwAAAACsE9trZdILk3wkyfeSvL619q9VdUGSM6pqzyQHZbiuUlswBgAAAMA6sdJmUmtt//HfjyT5qbnvXVxV+yc5IMnLN19jadEYAAAAAOvD9lqZtFBr7Wu57NPblo4BAMDV2REnnNM99/jD77SyPAAgcYFrAAAAALaCZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoJtmEgAAAADdNJMAAAAA6LZhRyfAVcsXXvXwrnl7Pe2dK84EAAAA2BGsTAIAAACgm2YSAAAAAN00kwAAAADoppkEAAAAQDfNJAAAAAC6aSYBAAAA0E0zCQAAAIBumkkAAAAAdNNMAgAAAKDbhh2dAAAAcOX7pRP+d/fc4w7/6RVmAsDOxsokAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoJtPc7sa+9JrntY172ZHvWrFmQAAAABXFZpJAABAkuRhJ/5799y/OuyWK8wEgPXMaW4AAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG4bdnQCAADAVdcTT/xC99w3HLbXCjMBYHuxMgkAAACAbppJAAAAAHTTTAIAAACgm2YSAAAAAN1cgBsAANiunnPSl7rnvvTQm60wEwCuCCuTAAAAAOimmQQAAABAt5U1k6pqj6o6Y7x9zao6uarOrKrHbc0YAAAAAOvHSppJVXWDJG9Jsts49LQkZ7XW9k3y4Kq6zlaMAQAAALBOrGpl0iVJHpbk4vH+/kmOG2+fmWTTVoxdTlUdWVVnVdVZF1544QpSBwAAAGCZlTSTWmsXt9YumhnaLcnmj2y4OMkeWzE2/9jHtNY2tdY2bdy4cRXpAwAAALDEhu20nW8m2TXJRUl2H+/3jgEAAFdzLz3pgu65zzn0pivMBIDt9WluZyfZb7y9T5LztmIMAAAAgHVie61MekuSU6rqXkl+OsnHMpzO1jMGAAAAwDqx0mZSa23/8d/zq+qADKuOnt9auyRJ7xgdLnjt87rm3fQpL1pxJgAAAMDObHutTEpr7cu57JPatmoMAAAAgPVhe10zCQAAAICdgGYSAAAAAN00kwAAAADoppkEAAAAQDfNJAAAAAC6aSYBAAAA0E0zCQAAAIBuG3Z0AgAAAKvwqpP+s3vu0w7dY4WZAOxcrEwCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAuvk0t3XoP1/30q55ezz5OSvOBAAAAODyrEwCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3F+AGAAAYvenEr3bPffxhP7bCTADWLyuTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbht2dAI7u6++/pVd837sSU9fcSYAAMAq/OUJF3bP/ZXDN64wE4Dtw8okAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTbLs2kqtpQVV+oqtPGrztU1Qur6uNV9eqZeVuMAQAAALB+bNhO27ljkne01o5OkqralGS/JHdLcnRV3S/J1+fHWmsf2k75AQAAbDcnHv9f3XMPO+LGK8wEYOttr2bSPZIcWlX3THJ+kk8mOaG11qrqQ0kOTnLRgrEtmklVdWSSI5Nkr7322k7pAwAAAJBsv2smfTzJfVpr+2VYgbRrki+N37s4yR5JdlswtoXW2jGttU2ttU0bN25cadIAAAAAXN72Wpn0qdbad8fb5ya5VoaGUpLsnqGp9c0FYwAAAFxBf/OO/tPp7v/LTqcD+myvZtJbq+rFST6d5NAkp2W4PtI7k+yT5LwkZyf5pbkxAAAAkpx8XH9j6MG/pDEErM72aib9bpK/TFJJ3pPkRUnOqKpXJjlw/Do/yUvnxgAAAABYR7ZLM6m19ukMn+h2qfET3B6U5JWttc8vGwMAAABg/dheK5O20Fr7dpLjp8YAAAAAWD9c5BoAAACAbjtsZdJV0YWvf0PXvI1PeuKKMwEAAADYMaxMAgAAAKCblUkAAABc6rS3X9g9d/9HbFxhJsB6ZWUSAAAAAN00kwAAAADoppkEAAAAQDfNJAAAAAC6aSYBAAAA0M2nuQEAALBN/v4v+j8B7p6P8glwcFVnZRIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALq5ADcAAADb3ceP/Wr33Ls+9sdWmAmwtaxMAgAAAKCbZhIAAAAA3TSTAAAAAOjmmkkAAABcJZzzZ/3XWbrT/3CdJVgVK5MAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTTTAIAAACg24YdnQAAAACsyv9+/X92zfvpJ+2x4kxg52FlEgAAAADdrrYrky58/bFd8zY+6bErzgQAAADgqsPKJAAAAAC6aSYBAAAA0O1qe5obAAAALPLZV/VdtPtWT3PRbq6erEwCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAuvk0NwAAANhGX/ijr3TN2+uZN1lxJrB6mkkAAACwA1zwsi93zbvp0XuuOBPYOk5zAwAAAKCbZhIAAAAA3TSTAAAAAOjmmkkAAABwFfGVPziva95NfmPvlebB1ZtmEgAAAOzEvvKH/6dr3k3+521WnAk7C6e5AQAAANBtXa5Mqqo3JbldklNaay/a0fkAAADA1clX/ujTXfNu8syfWXEmrEfrrplUVYcl2aW1tm9Vvbaqbt1a+7cdnRcAAACw3H/+8Se65u3xjDtv23ZeeWbfdp6+7zZth+Wqtbajc7icqvrTJB9orZ1SVUckuU5r7diZ7x+Z5Mjx7m2T/OuSh7pxkv/ays2LESNmfces17zEiBEjRowYMWJ2xDbEiBEjZlUxt2itbVwa1VpbV19J3pRkn/H2/ZM8+wo+zllixIjZuWLWa15ixIgRI0aMGDFXlbzEiBEjZltjWmvr8gLc30yy63h797hIOAAAAMC6sR4bNWcn2W+8vU+S83ZcKgAAAADMWncX4E7yriRnVNWeSQ5Kco8r+DjHiBEjZqeLWa95iREjRowYMWLE7IhtiBEjRsyOiFl/F+BOkqq6QZIDkpzeWvvKjs4HAAAAgMG6bCYBAAAAsD6tx2sm7fSq6oZVdUBV3XhH58IV+3lcGTE9j7Gjcruqx1yV7OzPj2lVtVtV/UJV/fiOzoUdx7GARB0wUAeoAa4KrrbNpKq6XlW9v6o+WFUnVdW1qmqPqjpjK2K2eIyOmFskeV+SuyX5SFVt7MltHN+jqj6xFc/nC1V12vh1h63Yzmur6uDO7Tx9ZhvnVNUbOmL2qKpTquqMqnp953ZuW1XvG2P+cFHMGLdVB96qumnmfh4ddTAfc4uOOpiPufn8dntyG8fXqoNFz2eqDpZtZ606mI/57Y46WJTbVB3Mx9y1pw6uqNl9O1UH8zE9x4MFMQv3/Vp5Lbo/sY0NUzWwxnaW1sCC7Tx5qgYWxNxgqgYWxPzEKmpg0X6qqmuvtZ+XxLyzqk6tqg/XcMr2VMw+ST6Q5OeSvLeqbt+znXF816r6963I7RMz9w/Yiu38ZlU9rXM7s8eCj1bVqT37oKqOG2+fWFXX7Ii5T1X99Xj7LxbFbIvZ2t+KY8Frq+rgrTwWbI7pOhbM5zaT39TxYPN2tuZ4ML+dnuPB5u1szfFgc8zWHA82x6zqeLBF/lN1sCDm2Kk6WLKfpt4bLNy3a9XBgpg3TdXBGttZ673BfMx3p+pgyb6eem8wH3Pqiupgi5rsqIPLxfQcDxbETB4Plv2+TNTB/HYmjwdrbGetOpjfzuTxYEHMwu1OxPzElV0Hix6zowYuF9NZA/MxPTWw8PlO1MD8dnpqYNl21qqB+e301MB8TE8NzMdc6TWwKlV106q6X1VdZ0fnsk1aazvdV5I3JTkzyfPWmPOUJAeMt1+X5NEZ3tD/01bE/Nrc/UM6Yg5Jco/x/iuSPKAnZrz91iTndub2/CQvm9hPi3K7V5ITtyZm5nuvSnKXzv32K+P9tyfZ1BHzf2f2218l2X9BzE2T/EOS5yb55yQbp2ohyf3mfh4P66iD+Zind9TBfMwDOupgi5iOOpiPObqjDhblNlUHC3ObqINF+22qDuZjvjZVBzOxeyT5xHh78pgwu2+T3GCqDhbELP3dWCNm6X5cNH/Z/Ylt/OxUDSx63KkaWCuXZTWwILfJY8GCmOOmaiDDh0t8Iclp49cdkrwwyceTvHrJ419uPyXZJcnJSc5bI6f5mEOSPHa8fXSSX+uI2SfJg8fbT03yxKmYmfEXJ/l2Z243SvLOiZ/lFttJcqskf5dkl96Yme/9RpLDOvfbb808pyM6Yl6e5OHj7Zclecwaz+u1SQ4eb/e8P7i09tN5LJiL6ToWzMX0Hgu2+L3MxPFgbjtdx4P57Szabk9u4/jS48Fcbl3Hg7mYnuPBk3PZseCcJG/oqYP5/HvqYC7muT11MBdzdE8dLNq3U3Ww4Pl0vS7MxXS9Liz6ua9VB3NzXtBTB3MxH+yogxskOSXJGUleP45NvUecr8m7TtXBgpjnT9XBgphnTdXBst+XtepgQczdpupg0Xam6mBZbmvVwZL9NvUecT7mw2vVQZKfyNCgOSPJH3bWwPwx5tCOGpiPmXxNWBAz+ZqwIGb/jhqYj3lmRw1ssZ2OGlh6bF6jBuZjev6/OB/z1bVqYBzfI8kZ4+1rZnivd2aSx83MuV2Sd/fMT7JXhteXD2e4iHV1xNwxyekZjnf/lORaHTE3S/LFXPZ6trEj5oUz889N8pyOmJ9N8qFx7H+uVRuttZ1vZVJVHZbhTe++Sfasqlsvmtdae21r7YPj3Y1JPp+hkXDxssdeEPOPc/e/2hHz1dbaR6vq3hkO4v/QE1NV903yrSQLL0i+IOYHSQ6tqr+rqrdX1Raf3Lcg5mtJ/izJeVX1kM7tfDVJqupmSfZorZ3dEXOdJLetqusnuXmG//RNxVw7wy9bxm1eb0F6t0/yjNbai5OcmuS+maiF1tqH5n4e7890HczHHNtRB/Mx/9BRB1vEdNTBfMy3M10H8zEfzXQdbJFbMlkH8zFfz3QdzMd8LdN1sNkrkuzae0yY27eXZKIO5mOW/W5MxCzcj2vktcX9judyj0zUwHxMDas81qyBZbmsVQMLYv47EzWwIOY2ma6BOyZ5R2tt/9ba/kl+JMl+GfbxF6vqfgtiLrefMjSkjkxy3pKnvyjmlNbaseP3ltXAfMxnWmsnV9WdM7xJ/ZupmPEviT+VoUn2sc7c7plk3/Gvde+rquv2bCfDf7z/T5JfrqpdOmNSVbsmuX9r7cSO3C5Kcuuq2j3DcfzfOmJ+Oh3Hgqq6V5KbtNbe23MsWFD7k8eC+ZieY8GCmJ5jwRa/l1PHgwUxk8eD+Zie48GyOWsdDxbETB4PFsRMHg9aa6+bORackaGeJ18TZvPPUJOTrwlzz/nFPa8JczEvm6qDBTFn97wuzD2fu6fjdWEu5lPpeF2Yz23R/YntfDYdrwtzMRszfTz41SRva63dK8l1quo3M10H8zX5n5mug/mY13fUwXzMX3TUwRa/Lx11MB9zYKbrYD7mPzJdBwt/lyfqYD7m/EWPMRFzk6xdBy9L8ntjDfx4z2tCtjzG/DDTNTAf86WOGpiP2a2jBrY4/nXUwHzMJZmugUXH2akaWHhsnqiB+ZiLMl0D8zHfWrTdzWpYMf6WJLuNQ09LctZYAw+uqutU1S2T/EGG/Tk5P8kTkzy5tXbfMc99O2J+OsMfH1+Y5HMZ/rA4FXP3JC+eeT37wVRMa+0FM/P/Ocm7O7bzqiSPzfC+8fCq+okF+/1SO10zKUO39Ljx9ocz/Adiqar6uSQ3aK2d3lq7qGcDMzEfXXR/KqaqKsOB6PsZfonXjMnwS/H8JM/uzS3DX2nu01rbL8N/2h/YEXObJP87w19871ZLTmuYfz7j0FEZuu09ub0tya0zdJzPzdAgmIp5UZIX1LCc8sAkfzs/d8Gb8Qekoxbmfx49dbDoZzhVB/MxPXUwN6fSUQdzMZ9MRx3MxTwyHXWwJP8162Au5rR01MFczFsyUQdjzOyL6f6ZqIMalhxfum9baxdP1cF8zMz40jpYFLNWHczPX7bNiW18PBM1sCDmUZmogTVyWVoDC2L+LhM1sCDm+EzXwHzj4b5JTmittQx/bbnXgpj5/fSA1tqXFz2PNWIeOOb8k5u32RuT5OAML+7f7ozZvLqvN7e9k/zC+Cb6tCSP6Yh5UIZG/vOT7J6hHnqfz68m+cvO3G6R4a9yv5bh9/ZzHTF/neTZNZyu9/gMb5AuZ0HjYf9MvyZcrvaTPLrjNWHh78vEa8IWMR2vCfMxT8/0a8L887lRpl8T5mOesuj59eyDrP2aML+dW2X6NWE+5oR0vCYkl2s83CL97xOPSvK6nteE+ZiZ7U6+R5yN6X2PuDmm53VhwXYmXxcWxEy+Lix6PkvurxUz+bqwIKbndWG+8bB3putgPpcLOupgYf4TdbBFTEcdzMdclOk6mI95f6brYD7moZmug2U/w7XqYD7mtCWPsVbMO7J2Hcw3Hv4w0zWwRW111MDCepyogS1iOmpgPuYjma6B+Zh/ynQNzMfsnekaWPY7uVYNzMeclukamI9505Ltbjb/B6L9c1kNnJlh5d03khzeO7+19tzW2r+MYzdKcmFHzDuTnF9VD8rw/9z5P1gsyuseSZ5SVf9QVX/c+VySJFV11yRfyrCyaSrmhq21/xjfM/93kkV/fLxMm1i6dFX7ylBE+4y375/k2WvMvWGSs5LcYmbstInHv1zMosfo2c44/ntJHjYVk+HA8ItT+c3F/MjM+NOyZJnaXMyrkxzYLlvet3D54oJ9cI0MK2269luGpYrXHcefmeTIzu3sl+E/DGudnlBJXpPh4HFsby3M/zym6mA+pqcOlv3c16qDuTkv6KmDNbaztA7mYv6lpw4W7IPJOpiL+XZPHSzYzpp1kOE/pacluf747+QxYdnv2MTv2xYxU3WwbDvL6mB+/lrxa8RMHgsWxEweC5Y8/zVrYMF2Jo8FS7YzVQN3TXLT8fZrMvzuPGS8f5uMpzjMxSzcTxM1sEVMhlVQp2f5KTpLfx4ZmiJHd8S8IMlvTNTBfMzRyaWf4Hpwkld1xjxpvL9rFizrX2O/fSTDX1Z7cvt2kjuO9w9L8pLOff2zSd6Y5I1LtvP4JCdm+Gv1izO88dpn/N6yY8HC2p+ogy1iMn0sWPo7liWvCQtifpjp48F8zEnL6m+NmA8vy3ViH0wdD7bILdPHg0XbmXxvMM5/SZKfT+f7xEX5r1UHi2Km6mDZdtaqg/mY9L9HnI3pfY84G9P7HnF+H/S8R5zdTu97xPntTL0u3CJDg/v5GU5F6XlvsDCXif28RcxUHaz1nJfVwYKYnvcH8zFPnPnesuPBfMz/m6qDJftg6ngwH3PBVB0s2c7SOkjyvAyvBQdn+M9777Fgi8dcqwYWxUzVwFo1vKwG5mN6amBBTO+xYDam91gwvw96jgWz2+k9Fsxvp+f/i6eN//5tkuuNt4/MePr8/D7snP+wDGerdMVk+H/KWzL8cewaUzEZXr+uM469L5e9b+rJ7W1JbtmTW4b3VU9N8isZVjPVWj+znXFl0jczvOlNhr+kLnyO419yjkvynNba+T0PPB/T8xgLYo6uqkeN375+hi7wVG73S3JUVZ2W5E5V9caOmLfWcFHTXTKcOvHJjpjPJvnJ8dubMiwxnYpJhr/yLzvVYlHMtZPcYczt7kla53bOyXBe6h8t21YbHJWhu3qPTNRCz8+jM2aqDuZj9u6og/mYAzNdB/Mxr++og/mYN2e6Dhbtg6k6mI/5TqbrYNF2zsnadfDsJK9prX19vN9zTJj8HeuI+fNMH1PmY9pEHVxufobVJFN5zse8f6oGFsQ8KRM1sOC5vDETNbBgO3tmogaWbOecrF0Dn2qtXTDePjd9NTB5zOyMOTbDm4mzOmO+WFW/PX7v+ll8DJqPeWKSQ2b2yckdMQ/P8AY6SX5xyfObj/n3TNfBFvugqvZOclFr7Vud++DcDKfsJcPy8EV1sGhfn5PkZzK8gV7kzkmOaa19JcMbqdMzXQeTr4OdMVPHgvmYQztei+ZjPpnp48F8TDrqfD7m5xc8v6mY8zN9PNgit0wfDxZt55xMvDeoqmsk+fnW2kfS+T6xI/81Y7bifeZsTO97ktncel+/ZmN6j3ezMb2/G/P7rWc/zs6ZfI+45HHPydp18JIMzfHfzXDM+ZVM10FvLmvFVKbrYD7mBh11MB/z5EzXwXzM73fUwXzMCzJdB4v221QdzMf86ILH6NnOOVlSB621F2VYjfWEDP+J7z0WLH3MNVwasxXHgtmY3mPBbG69x4LZmN5jwWxM77FgNibpOxbMxvT+/s1vZ/7+WnprYM35NaxIf1aSX++Naa19vbX26Az/H7prR8yZrbVvjGPnZli11ZPb9ZP8WGvt3ztze+L4+E/NcD2ttY97a3WaropfGZbhPmu8/cKMF+5aMO/JGZbLnTZ+Ta5IWRDzgkWPMbWdDKegnZ7hoqBbdPuW5bZWfkty+1SGjuKLO2MenaE7enqGc3Nv1vl8XpIFF1ldI+boJJ/JUMAfTLJ753ZemORX19jO0UkeNd5+1fh81qyFXHZK4OV+HhN1MB+z9Oe1RszC7fbkNlEH8zF36KiD+ZjrdNTBFrl11MF8zN076mDRdqbq4PSZn8XXM5wyM3lMWLRv16qD+ZieOlgQM1kHy3LpyW3cxs9M1cCCmMkaWJTLVA0s2M7dpmpgyXamauC4DOef75JhVcXzM154O8N54L+1IGbhflprP8/HJDkowyqbzTXw9I6Ya2U4Vef0JO9M8qO9ua2V34Lt3DTDm7hPZzj165odMbuMc09P8okkd+jJLcNft565Ffvtlkn+McMpbh9bVG9LtvPYJL+9xnZ+PclTxtu/muHaiFOvCQtrf6IO5mOem+nXhC22k+nXhKW/l2vUwXzMfZbV0hoxt1i23YnnM/WasCi3qdeERdtZ83gwxt0nyZ+Mt3vfJ26R/1p1MB+TzteEuZiu14Rl+3aiTme30/W6MBfT9bown9tUHSzYTtfrwoLtTL0unJThGiC7ZLgw7wum6mBZLhP7eT7maVN1sCDm5lN1sNZ+WpbfgpjJOlgQ0/MecYvcpupgQcy9p+pgyXam6mD3DK9n107/sWCLx1yrBuZj0n8smI3pPRYsfL4TNTq7nd5jwWxM77HgcrlN1cCC7fQeC+a30/OacNr47/MzfuhHhgbjvov24Vrzx5/VxzL3Hmki5nVJ7j2OvTfJbTtiTsvwXu7aGd7LTcaMtx+d5Ne35vlnOAaduazuZr82/8d5p1HDRUXPyLBs66AMV3S/aMdmxfZQw0XSjstwmsmnM1yx/vSohaut8a8zh8Qx4Wqlqn4mw+kMleQ9SX47Qw2clWGF34Gttc/vuAzZHmq4kOSfZ7hOzjUzrM56TxwLrnaq6iUZLjJ6oveJV09VdbcMK0dvkeE/wIdHHVztVNULk3y2tfZWx4Krr6o6rbW2f1XdIsOnPH4oQ8PmHq21S2bnTM3P0CR7VJJ/HR/+Ba21/zURs1eGT91rSf6mtfZ7Hdu5d4Ym1PcyrLp+dc9zqaq/TPKK1trm64X1xLwlwyUEzpjclztbMym5tKlwQJLT27C8nasptUCiDrj008UelOG6P4su8MzVgGMBiTpgoA5QA1TVnhmus3RqTzNxa+fvjDGXi98Zm0kAAAAArMbOeAFuAAAAAFZEMwkAAACAbppJAABXkqr6kaq65VbG7Do9CwBg/dBMAgC48vxKklct+2ZV/XhVHVBVT5wZflNVHbj61AAArhwuwA0AcCWoqo1Jzkny70kuTnLTJBdk+OPdrq21n6+q4zJ8vO+rk7wryYuTvD3Jt5I8qrX2w+2fOQDA1tFMAgDYRlV1zSTvS/Lh1trvj2Mfba3dY2bOxgyNpL9McvMkb0vyxCSfSPLdJIcn+fXW2ve3c/oAAFtFMwkAYBtV1d5JHpfkukluNQ7fM8k/JNmQ5D1JPpPkDhlWLF2c5JIk+yQ5Lcl/Z1jB9KOttbdtx9QBALaaZhIAwJWkqj6c5P6ttR8sWJn0kxkaTrsmeXOSJyc5Kcl1kvxEkk+11j64/bMGANg6LsANALCNqmqXqtolycK/0lXVNZJUkh8m2ZTkPkl2S/KNDKuUdstwmhsAwLq3YUcnAACwEzgyyUOSfDvJu6oqSW5bVSeP398lwyqk+yd5a5IfzdB4ul6SGya5d5KHb+ecAQCuEKe5AQCsQFV9rLV295n7t0zysiR/n+QmGa6d9Ngkxyf5cmvtqB2SKADAVnKaGwDAauw+d/+/MnyS298k+d0k12ytXZLkEUnuWFU32s75AQBcIVYmAQDsYFW1y9hYAgBY9zSTAAAAAOjmNDcAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHT7/7ayEy+RSs91AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1440x576 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#可视化一下\n",
    "# plt.bar(df_train['age'].value_counts().sort_index().index,df_train['age'].value_counts().sort_index().values)\n",
    "plt.figure(figsize=(20,8))\n",
    "sns.barplot(df_train['age'].value_counts().sort_index().index, df_train['age'].value_counts().sort_index().values)\n",
    "plt.title(\"df_train里面的age分布情况\")\n",
    "plt.xlabel(\"年龄\")\n",
    "plt.ylabel(\"数量\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1         (40.0, 50.0]\n",
       "2         (25.0, 40.0]\n",
       "3         (25.0, 40.0]\n",
       "4         (25.0, 40.0]\n",
       "5         (40.0, 50.0]\n",
       "              ...     \n",
       "149996     (70.0, inf]\n",
       "149997    (40.0, 50.0]\n",
       "149998    (50.0, 60.0]\n",
       "149999    (25.0, 40.0]\n",
       "150000    (60.0, 70.0]\n",
       "Name: bin_age, Length: 150000, dtype: category\n",
       "Categories (6, interval[float64]): [(-inf, 25.0] < (25.0, 40.0] < (40.0, 50.0] < (50.0, 60.0] < (60.0, 70.0] < (70.0, inf]]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#看到这个年龄的分布之后  开始做分箱\n",
    "age_bins=[-math.inf, 25, 40, 50, 60, 70,math.inf]#这里是分成了6段 0-25 25-40 40-50 50-60 60-70 70-无穷大\n",
    "df_train['bin_age']=pd.cut(df_train['age'],bins=age_bins)#这个操作对年龄段进行分箱\n",
    "df_train['bin_age']#这样就按照分箱的方式分好了  比如第一个年龄是45  就被分到了40-50这一箱  注意观察 这里还是左开右闭的操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>bin_age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>45</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>40</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>38</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>30</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>49</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>74</td>\n",
       "      <td>(70.0, inf]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>44</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>58</td>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>30</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>64</td>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        age       bin_age\n",
       "1        45  (40.0, 50.0]\n",
       "2        40  (25.0, 40.0]\n",
       "3        38  (25.0, 40.0]\n",
       "4        30  (25.0, 40.0]\n",
       "5        49  (40.0, 50.0]\n",
       "...     ...           ...\n",
       "149996   74   (70.0, inf]\n",
       "149997   44  (40.0, 50.0]\n",
       "149998   58  (50.0, 60.0]\n",
       "149999   30  (25.0, 40.0]\n",
       "150000   64  (60.0, 70.0]\n",
       "\n",
       "[150000 rows x 2 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#其实 还可以把两个都打印出来看看\n",
    "df_train[['age','bin_age']]#这样分布 一目了然  #有了age这个做基础  其他都可以仿照类似的情况去操作了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0     90826\n",
       "1.0     26316\n",
       "2.0     19522\n",
       "3.0      9483\n",
       "4.0      2862\n",
       "5.0       746\n",
       "6.0       158\n",
       "7.0        51\n",
       "8.0        24\n",
       "9.0         5\n",
       "10.0        5\n",
       "13.0        1\n",
       "20.0        1\n",
       "Name: NumberOfDependents, dtype: int64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对于 NumberOfDependents （家属人数）字段，分成6段 [-math.inf,2,4,6,8,10,math.inf]  在做之前 还是对家属字段进行一下探索\n",
    "df_train['NumberOfDependents'].value_counts()#家属人数为0的最多  还有20个家属的 真是吊 \n",
    "#这个就不用可视化了 直接分箱了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1         (-inf, 2.0]\n",
       "2         (-inf, 2.0]\n",
       "3         (-inf, 2.0]\n",
       "4         (-inf, 2.0]\n",
       "5         (-inf, 2.0]\n",
       "             ...     \n",
       "149996    (-inf, 2.0]\n",
       "149997    (-inf, 2.0]\n",
       "149998    (-inf, 2.0]\n",
       "149999    (-inf, 2.0]\n",
       "150000    (-inf, 2.0]\n",
       "Name: bin_NumberOfDependents, Length: 150000, dtype: category\n",
       "Categories (6, interval[float64]): [(-inf, 2.0] < (2.0, 4.0] < (4.0, 6.0] < (6.0, 8.0] < (8.0, 10.0] < (10.0, inf]]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dependent_bins=[-math.inf,2,4,6,8,10,math.inf]#这里是把家庭人数分成了 0-2 2-4 4-6 6-8 8-10 10-无穷大 6组\n",
    "df_train['bin_NumberOfDependents']=pd.cut(df_train['NumberOfDependents'],bins=dependent_bins)#这个操作对家庭人数进行分箱\n",
    "df_train['bin_NumberOfDependents']#这样就按照分箱的方式分好了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NumberOfDependents</th>\n",
       "      <th>bin_NumberOfDependents</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>2.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        NumberOfDependents bin_NumberOfDependents\n",
       "1                      2.0            (-inf, 2.0]\n",
       "2                      1.0            (-inf, 2.0]\n",
       "3                      0.0            (-inf, 2.0]\n",
       "4                      0.0            (-inf, 2.0]\n",
       "5                      0.0            (-inf, 2.0]\n",
       "...                    ...                    ...\n",
       "149996                 0.0            (-inf, 2.0]\n",
       "149997                 2.0            (-inf, 2.0]\n",
       "149998                 0.0            (-inf, 2.0]\n",
       "149999                 0.0            (-inf, 2.0]\n",
       "150000                 0.0            (-inf, 2.0]\n",
       "\n",
       "[150000 rows x 2 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train[['NumberOfDependents','bin_NumberOfDependents']]#这样就分好箱了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     126018\n",
       "1      16033\n",
       "2       4598\n",
       "3       1754\n",
       "4        747\n",
       "5        342\n",
       "98       264\n",
       "6        140\n",
       "7         54\n",
       "8         25\n",
       "9         12\n",
       "96         5\n",
       "10         4\n",
       "12         2\n",
       "13         1\n",
       "11         1\n",
       "Name: NumberOfTime30-59DaysPastDueNotWorse, dtype: int64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对于3种逾期次数，即 NumberOfTime30-59DaysPastDueNotWorse， NumberOfTime60-89DaysPastDueNotWorse， NumberOfTimes90DaysLate，分成10段\n",
    "#[-math.inf,1,2,3,4,5,6,7,8,9,math.inf]\n",
    "#还是先看下数据的分布\n",
    "df_train['NumberOfTime30-59DaysPastDueNotWorse'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     142396\n",
       "1       5731\n",
       "2       1118\n",
       "3        318\n",
       "98       264\n",
       "4        105\n",
       "5         34\n",
       "6         16\n",
       "7          9\n",
       "96         5\n",
       "8          2\n",
       "11         1\n",
       "9          1\n",
       "Name: NumberOfTime60-89DaysPastDueNotWorse, dtype: int64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['NumberOfTime60-89DaysPastDueNotWorse'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     141662\n",
       "1       5243\n",
       "2       1555\n",
       "3        667\n",
       "4        291\n",
       "98       264\n",
       "5        131\n",
       "6         80\n",
       "7         38\n",
       "8         21\n",
       "9         19\n",
       "10         8\n",
       "11         5\n",
       "96         5\n",
       "13         4\n",
       "12         2\n",
       "14         2\n",
       "15         2\n",
       "17         1\n",
       "Name: NumberOfTimes90DaysLate, dtype: int64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['NumberOfTimes90DaysLate'].value_counts() #数据大致探索了 还是要分箱吧"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>bin_NumberOfTimes90DaysLate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>1</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        NumberOfTime30-59DaysPastDueNotWorse bin_NumberOfTime30-59DaysPastDueNotWorse  NumberOfTime60-89DaysPastDueNotWorse bin_NumberOfTime60-89DaysPastDueNotWorse  NumberOfTimes90DaysLate bin_NumberOfTimes90DaysLate\n",
       "1                                          2                               (1.0, 2.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "2                                          0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "3                                          1                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        1                 (-inf, 1.0]\n",
       "4                                          0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "5                                          1                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "...                                      ...                                      ...                                   ...                                      ...                      ...                         ...\n",
       "149996                                     0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "149997                                     0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "149998                                     0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "149999                                     0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "150000                                     0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "\n",
       "[150000 rows x 6 columns]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#3）对于3种逾期次数，即 NumberOfTime30-59DaysPastDueNotWorse，NumberOfTime60-89DaysPastDueNotWorse，NumberOfTimes90DaysLate，分成10段\n",
    "dpd_bins=[-math.inf,1,2,3,4,5,6,7,8,9,math.inf]#这里是分成了 0-1 1-2 2-3 ...8-9 9-正无穷\n",
    "df_train['bin_NumberOfTime30-59DaysPastDueNotWorse']=pd.cut(df_train['NumberOfTime30-59DaysPastDueNotWorse'],bins=dpd_bins)\n",
    "df_train['bin_NumberOfTime60-89DaysPastDueNotWorse']=pd.cut(df_train['NumberOfTime60-89DaysPastDueNotWorse'],bins=dpd_bins)\n",
    "df_train['bin_NumberOfTimes90DaysLate']=pd.cut(df_train['NumberOfTimes90DaysLate'],bins=dpd_bins)\n",
    "\n",
    "#查看分箱情况\n",
    "df_train[['NumberOfTime30-59DaysPastDueNotWorse','bin_NumberOfTime30-59DaysPastDueNotWorse',\\\n",
    "          'NumberOfTime60-89DaysPastDueNotWorse','bin_NumberOfTime60-89DaysPastDueNotWorse',\\\n",
    "          'NumberOfTimes90DaysLate','bin_NumberOfTimes90DaysLate']]#这样就完成了字段的分箱"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>bin_DebtRatio</th>\n",
       "      <th>DebtRatio</th>\n",
       "      <th>bin_MonthlyIncome</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>0.766127</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>0.802982</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>9120.0</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>13</td>\n",
       "      <td>(2.0, 54.0]</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>0.957151</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>0.121876</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>4</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>0.658180</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>0.085113</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>3042.0</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>2</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>0.233810</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>0.036050</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>3300.0</td>\n",
       "      <td>(4.0, 6.0]</td>\n",
       "      <td>5</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>0.907239</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>0.024926</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>63588.0</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>7</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "      <td>0.040674</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>0.225131</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>2100.0</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>4</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>0.299745</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>0.716562</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>5584.0</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>4</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>0.246044</td>\n",
       "      <td>(4.0, 329664.0]</td>\n",
       "      <td>3870.000000</td>\n",
       "      <td>(3400.0, 5400.0]</td>\n",
       "      <td>5400.0</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>18</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>5716.0</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>4</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>0.850283</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>0.249908</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>8158.0</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>8</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       bin_RevolvingUtilizationOfUnsecuredLines  RevolvingUtilizationOfUnsecuredLines    bin_DebtRatio    DebtRatio    bin_MonthlyIncome  MonthlyIncome bin_NumberOfOpenCreditLinesAndLoans  NumberOfOpenCreditLinesAndLoans bin_NumberRealEstateLoansOrLines  NumberRealEstateLoansOrLines\n",
       "1                              (0.699, 50708.0]                              0.766127     (0.468, 4.0]     0.802982  (8250.0, 3008750.0]         9120.0                        (12.0, 58.0]                               13                      (2.0, 54.0]                             6\n",
       "2                              (0.699, 50708.0]                              0.957151  (-0.001, 0.134]     0.121876     (-0.001, 3400.0]         2600.0                       (-0.001, 4.0]                                4                    (-0.001, 1.0]                             0\n",
       "3                                (0.271, 0.699]                              0.658180  (-0.001, 0.134]     0.085113     (-0.001, 3400.0]         3042.0                       (-0.001, 4.0]                                2                    (-0.001, 1.0]                             0\n",
       "4                               (0.0832, 0.271]                              0.233810  (-0.001, 0.134]     0.036050     (-0.001, 3400.0]         3300.0                          (4.0, 6.0]                                5                    (-0.001, 1.0]                             0\n",
       "5                              (0.699, 50708.0]                              0.907239  (-0.001, 0.134]     0.024926  (8250.0, 3008750.0]        63588.0                          (6.0, 9.0]                                7                    (-0.001, 1.0]                             1\n",
       "...                                         ...                                   ...              ...          ...                  ...            ...                                 ...                              ...                              ...                           ...\n",
       "149996                         (0.0192, 0.0832]                              0.040674   (0.134, 0.287]     0.225131     (-0.001, 3400.0]         2100.0                       (-0.001, 4.0]                                4                    (-0.001, 1.0]                             1\n",
       "149997                           (0.271, 0.699]                              0.299745     (0.468, 4.0]     0.716562     (5400.0, 8250.0]         5584.0                       (-0.001, 4.0]                                4                    (-0.001, 1.0]                             1\n",
       "149998                          (0.0832, 0.271]                              0.246044  (4.0, 329664.0]  3870.000000     (3400.0, 5400.0]         5400.0                        (12.0, 58.0]                               18                    (-0.001, 1.0]                             1\n",
       "149999                         (-0.001, 0.0192]                              0.000000  (-0.001, 0.134]     0.000000     (5400.0, 8250.0]         5716.0                       (-0.001, 4.0]                                4                    (-0.001, 1.0]                             0\n",
       "150000                         (0.699, 50708.0]                              0.850283   (0.134, 0.287]     0.249908     (5400.0, 8250.0]         8158.0                          (6.0, 9.0]                                8                       (1.0, 2.0]                             2\n",
       "\n",
       "[150000 rows x 10 columns]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#接下来对其余字段进行一下分箱处理\n",
    "#对于其余字段，即RevolvingUtilizationOfUnsecuredLines, DebtRatio,MonthlyIncome, NumberOfOpenCreditLinesAndLoans,NumberRealEstateLoansOrLines \n",
    "#把他们分成5段 采用pd.qcut()函数 按照频率来分一下  q=5表示分成5段  \n",
    "#duplicates='drop'表示如果数据分布特别不均匀 按照频率分不了5段的时候 合并一下\n",
    "df_train['bin_RevolvingUtilizationOfUnsecuredLines']=pd.qcut(df_train['RevolvingUtilizationOfUnsecuredLines'],q=5,duplicates='drop')\n",
    "df_train['bin_DebtRatio']=pd.qcut(df_train['DebtRatio'],q=5,duplicates='drop')\n",
    "df_train['bin_MonthlyIncome']=pd.qcut(df_train['MonthlyIncome'],q=5,duplicates='drop')\n",
    "df_train['bin_NumberOfOpenCreditLinesAndLoans']=pd.qcut(df_train['NumberOfOpenCreditLinesAndLoans'],q=5,duplicates='drop')\n",
    "df_train['bin_NumberRealEstateLoansOrLines']=pd.qcut(df_train['NumberRealEstateLoansOrLines'],q=5,duplicates='drop')\n",
    "\n",
    "df_train[['bin_RevolvingUtilizationOfUnsecuredLines','RevolvingUtilizationOfUnsecuredLines',\\\n",
    "          'bin_DebtRatio','DebtRatio',\\\n",
    "          'bin_MonthlyIncome','MonthlyIncome',\\\n",
    "          'bin_NumberOfOpenCreditLinesAndLoans','NumberOfOpenCreditLinesAndLoans',\\\n",
    "          'bin_NumberRealEstateLoansOrLines','NumberRealEstateLoansOrLines']]#这样就完成了字段的分箱 分成了5段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-0.001, 1.0]    108526\n",
       "(1.0, 2.0]        31522\n",
       "(2.0, 54.0]        9952\n",
       "Name: bin_NumberRealEstateLoansOrLines, dtype: int64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#哪一个字段来看看\n",
    "df_train['bin_NumberRealEstateLoansOrLines'].value_counts()#这里本来是分成5段 但是 duplicates='drop' 这里就合并了一下 因为数据分布问题\n",
    "#分成了3段 这样是不合理的  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     56188\n",
       "1     52338\n",
       "2     31522\n",
       "3      6300\n",
       "4      2170\n",
       "5       689\n",
       "6       320\n",
       "7       171\n",
       "8        93\n",
       "9        78\n",
       "10       37\n",
       "11       23\n",
       "12       18\n",
       "13       15\n",
       "14        7\n",
       "15        7\n",
       "16        4\n",
       "17        4\n",
       "25        3\n",
       "18        2\n",
       "19        2\n",
       "20        2\n",
       "23        2\n",
       "32        1\n",
       "21        1\n",
       "26        1\n",
       "29        1\n",
       "54        1\n",
       "Name: NumberRealEstateLoansOrLines, dtype: int64"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['NumberRealEstateLoansOrLines'].value_counts()#按照人数来分 3万人一份  这里0和1 都占了4份 然后合并了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>(3.0, inf]</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       bin_NumberRealEstateLoansOrLines  NumberRealEstateLoansOrLines\n",
       "1                            (3.0, inf]                             6\n",
       "2                           (-inf, 0.0]                             0\n",
       "3                           (-inf, 0.0]                             0\n",
       "4                           (-inf, 0.0]                             0\n",
       "5                            (0.0, 1.0]                             1\n",
       "...                                 ...                           ...\n",
       "149996                       (0.0, 1.0]                             1\n",
       "149997                       (0.0, 1.0]                             1\n",
       "149998                       (0.0, 1.0]                             1\n",
       "149999                      (-inf, 0.0]                             0\n",
       "150000                       (1.0, 2.0]                             2\n",
       "\n",
       "[150000 rows x 2 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "loans_bins=[-math.inf,0,1,2,3,math.inf]#分成了 负无穷-0  0-1 1-2 2-3 3-正无穷 5段\n",
    "df_train['bin_NumberRealEstateLoansOrLines']=pd.cut(df_train['NumberRealEstateLoansOrLines'],bins=loans_bins)#这样重新分一下\n",
    "df_train[['bin_NumberRealEstateLoansOrLines','NumberRealEstateLoansOrLines']]#这样分成了5段貌似更合理一些"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-inf, 0.0]    56188\n",
       "(0.0, 1.0]     52338\n",
       "(1.0, 2.0]     31522\n",
       "(2.0, 3.0]      6300\n",
       "(3.0, inf]      3652\n",
       "Name: bin_NumberRealEstateLoansOrLines, dtype: int64"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['bin_NumberRealEstateLoansOrLines'].value_counts()#这个分起来 好像更合理一点"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SeriousDlqin2yrs</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>age</th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>DebtRatio</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfDependents</th>\n",
       "      <th>bin_age</th>\n",
       "      <th>bin_NumberOfDependents</th>\n",
       "      <th>bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>bin_DebtRatio</th>\n",
       "      <th>bin_MonthlyIncome</th>\n",
       "      <th>bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>bin_NumberRealEstateLoansOrLines</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0.766127</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>0.802982</td>\n",
       "      <td>9120.0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>(3.0, inf]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0.957151</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0.121876</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0.658180</td>\n",
       "      <td>38</td>\n",
       "      <td>1</td>\n",
       "      <td>0.085113</td>\n",
       "      <td>3042.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0.233810</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.036050</td>\n",
       "      <td>3300.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(4.0, 6.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "      <td>0.907239</td>\n",
       "      <td>49</td>\n",
       "      <td>1</td>\n",
       "      <td>0.024926</td>\n",
       "      <td>63588.0</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0</td>\n",
       "      <td>0.040674</td>\n",
       "      <td>74</td>\n",
       "      <td>0</td>\n",
       "      <td>0.225131</td>\n",
       "      <td>2100.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>0</td>\n",
       "      <td>0.299745</td>\n",
       "      <td>44</td>\n",
       "      <td>0</td>\n",
       "      <td>0.716562</td>\n",
       "      <td>5584.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0</td>\n",
       "      <td>0.246044</td>\n",
       "      <td>58</td>\n",
       "      <td>0</td>\n",
       "      <td>3870.000000</td>\n",
       "      <td>5400.0</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>(4.0, 329664.0]</td>\n",
       "      <td>(3400.0, 5400.0]</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5716.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>0</td>\n",
       "      <td>0.850283</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0.249908</td>\n",
       "      <td>8158.0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        SeriousDlqin2yrs  RevolvingUtilizationOfUnsecuredLines  age  NumberOfTime30-59DaysPastDueNotWorse    DebtRatio  MonthlyIncome  NumberOfOpenCreditLinesAndLoans  NumberOfTimes90DaysLate  NumberRealEstateLoansOrLines  NumberOfTime60-89DaysPastDueNotWorse  NumberOfDependents       bin_age bin_NumberOfDependents bin_NumberOfTime30-59DaysPastDueNotWorse bin_NumberOfTime60-89DaysPastDueNotWorse bin_NumberOfTimes90DaysLate bin_RevolvingUtilizationOfUnsecuredLines    bin_DebtRatio    bin_MonthlyIncome bin_NumberOfOpenCreditLinesAndLoans bin_NumberRealEstateLoansOrLines\n",
       "1                      1                              0.766127   45                                     2     0.802982         9120.0                               13                        0                             6                                     0                 2.0  (40.0, 50.0]            (-inf, 2.0]                               (1.0, 2.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]     (0.468, 4.0]  (8250.0, 3008750.0]                        (12.0, 58.0]                       (3.0, inf]\n",
       "2                      0                              0.957151   40                                     0     0.121876         2600.0                                4                        0                             0                                     0                 1.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]  (-0.001, 0.134]     (-0.001, 3400.0]                       (-0.001, 4.0]                      (-inf, 0.0]\n",
       "3                      0                              0.658180   38                                     1     0.085113         3042.0                                2                        1                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                           (0.271, 0.699]  (-0.001, 0.134]     (-0.001, 3400.0]                       (-0.001, 4.0]                      (-inf, 0.0]\n",
       "4                      0                              0.233810   30                                     0     0.036050         3300.0                                5                        0                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                          (0.0832, 0.271]  (-0.001, 0.134]     (-0.001, 3400.0]                          (4.0, 6.0]                      (-inf, 0.0]\n",
       "5                      0                              0.907239   49                                     1     0.024926        63588.0                                7                        0                             1                                     0                 0.0  (40.0, 50.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]  (-0.001, 0.134]  (8250.0, 3008750.0]                          (6.0, 9.0]                       (0.0, 1.0]\n",
       "...                  ...                                   ...  ...                                   ...          ...            ...                              ...                      ...                           ...                                   ...                 ...           ...                    ...                                      ...                                      ...                         ...                                      ...              ...                  ...                                 ...                              ...\n",
       "149996                 0                              0.040674   74                                     0     0.225131         2100.0                                4                        0                             1                                     0                 0.0   (70.0, inf]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.0192, 0.0832]   (0.134, 0.287]     (-0.001, 3400.0]                       (-0.001, 4.0]                       (0.0, 1.0]\n",
       "149997                 0                              0.299745   44                                     0     0.716562         5584.0                                4                        0                             1                                     0                 2.0  (40.0, 50.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                           (0.271, 0.699]     (0.468, 4.0]     (5400.0, 8250.0]                       (-0.001, 4.0]                       (0.0, 1.0]\n",
       "149998                 0                              0.246044   58                                     0  3870.000000         5400.0                               18                        0                             1                                     0                 0.0  (50.0, 60.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                          (0.0832, 0.271]  (4.0, 329664.0]     (3400.0, 5400.0]                        (12.0, 58.0]                       (0.0, 1.0]\n",
       "149999                 0                              0.000000   30                                     0     0.000000         5716.0                                4                        0                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (-0.001, 0.0192]  (-0.001, 0.134]     (5400.0, 8250.0]                       (-0.001, 4.0]                      (-inf, 0.0]\n",
       "150000                 0                              0.850283   64                                     0     0.249908         8158.0                                8                        0                             2                                     0                 0.0  (60.0, 70.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]   (0.134, 0.287]     (5400.0, 8250.0]                          (6.0, 9.0]                       (1.0, 2.0]\n",
       "\n",
       "[150000 rows x 21 columns]"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', 10000)\n",
    "#现在这个时候再看看df_train\n",
    "df_train#本来是11个字段 出来要预测的那个值之外 其余的全部做了一个分箱  所以就变成了21个字段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['SeriousDlqin2yrs', 'RevolvingUtilizationOfUnsecuredLines', 'age',\n",
       "       'NumberOfTime30-59DaysPastDueNotWorse', 'DebtRatio',\n",
       "       'MonthlyIncome', 'NumberOfOpenCreditLinesAndLoans',\n",
       "       'NumberOfTimes90DaysLate', 'NumberRealEstateLoansOrLines',\n",
       "       'NumberOfTime60-89DaysPastDueNotWorse', 'NumberOfDependents',\n",
       "       'bin_age', 'bin_NumberOfDependents',\n",
       "       'bin_NumberOfTime30-59DaysPastDueNotWorse',\n",
       "       'bin_NumberOfTime60-89DaysPastDueNotWorse',\n",
       "       'bin_NumberOfTimes90DaysLate',\n",
       "       'bin_RevolvingUtilizationOfUnsecuredLines', 'bin_DebtRatio',\n",
       "       'bin_MonthlyIncome', 'bin_NumberOfOpenCreditLinesAndLoans',\n",
       "       'bin_NumberRealEstateLoansOrLines'], dtype=object)"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.columns.values#除了SeriousDlqin2yrs是我们要预测的目标值 所以它没有分享  其余的全部分箱了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['bin_age',\n",
       " 'bin_NumberOfDependents',\n",
       " 'bin_NumberOfTime30-59DaysPastDueNotWorse',\n",
       " 'bin_NumberOfTime60-89DaysPastDueNotWorse',\n",
       " 'bin_NumberOfTimes90DaysLate',\n",
       " 'bin_RevolvingUtilizationOfUnsecuredLines',\n",
       " 'bin_DebtRatio',\n",
       " 'bin_MonthlyIncome',\n",
       " 'bin_NumberOfOpenCreditLinesAndLoans',\n",
       " 'bin_NumberRealEstateLoansOrLines']"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#统计一下分箱字段\n",
    "#统计分箱字段\n",
    "bin_cols=[c for c in df_train.columns.values if c.startswith('bin_')]\n",
    "print(len(bin_cols))\n",
    "bin_cols#果然是10个"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3计算WOE与IV值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1         (40.0, 50.0]\n",
       "2         (25.0, 40.0]\n",
       "3         (25.0, 40.0]\n",
       "4         (25.0, 40.0]\n",
       "5         (40.0, 50.0]\n",
       "              ...     \n",
       "149996     (70.0, inf]\n",
       "149997    (40.0, 50.0]\n",
       "149998    (50.0, 60.0]\n",
       "149999    (25.0, 40.0]\n",
       "150000    (60.0, 70.0]\n",
       "Name: bin_age, Length: 150000, dtype: category\n",
       "Categories (6, interval[float64]): [(-inf, 25.0] < (25.0, 40.0] < (40.0, 50.0] < (50.0, 60.0] < (60.0, 70.0] < (70.0, inf]]"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#下面这几行代码 是下面函数的分布注释\n",
    "df_train['bin_age']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['bin_age'].nunique()# nunique    number of unique"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(40.0, 50.0], (25.0, 40.0], (70.0, inf], (50.0, 60.0], (60.0, 70.0], (-inf, 25.0]]\n",
       "Categories (6, interval[float64]): [(-inf, 25.0] < (25.0, 40.0] < (40.0, 50.0] < (50.0, 60.0] < (60.0, 70.0] < (70.0, inf]]"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['bin_age'].unique()#就是下面这6组"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Interval(40.0, 50.0, closed='right')"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(df_train['bin_age'].unique())[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "(40.0, 50.0]\n",
      "1\n",
      "(25.0, 40.0]\n",
      "2\n",
      "(70.0, inf]\n",
      "3\n",
      "(50.0, 60.0]\n",
      "4\n",
      "(60.0, 70.0]\n",
      "5\n",
      "(-inf, 25.0]\n"
     ]
    }
   ],
   "source": [
    "#这样就遍历完了 里面的每一个分组\n",
    "for i in range(df_train['bin_age'].nunique()):\n",
    "    print(i)\n",
    "    print(list(df_train['bin_age'].unique())[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Variable         Value    All   Bad\n",
      "0  bin_age  (40.0, 50.0]  35037  2893\n",
      "1  bin_age  (25.0, 40.0]  32069  3296\n",
      "2  bin_age   (70.0, inf]  17636   398\n",
      "3  bin_age  (50.0, 60.0]  34806  2149\n",
      "4  bin_age  (60.0, 70.0]  27424   952\n",
      "5  bin_age  (-inf, 25.0]   3028   338\n"
     ]
    }
   ],
   "source": [
    "#计算一下IV值\n",
    "#计算IV,衡量一下变量的预测能力\n",
    "def cal_IV(df, feature, target):\n",
    "    lst = []\n",
    "    cols = ['Variable', 'Value', 'All', 'Bad']#变量名称 值 整个情况 bad的情况\n",
    "    # 对feature字段中的每个分箱的取值进行变量\n",
    "    for i in range(df[feature].nunique()): # unique代表不同的值，nunique = number of unique 不同值的个数\n",
    "        # feature字段比如bib_age的第i个分箱取值 \n",
    "        val = list(df[feature].unique())[i]\n",
    "        # 统计feature比如 ‘bin_age’， feature 对应的分为值 比如 (40.0, 50.0]， \n",
    "        #len(df[df[feature]==val])这个 值 的个数 是一个总数，这个 值 导致target=1的个数 取名Bad 我们这个数据集的target就是那个SeriousDlqin2yrs\n",
    "        lst.append([feature, val, len(df[df[feature]==val]), len(df[(df[feature]==val) & (df[target]==1)])])    \n",
    "    #上述几个指标拿出来了以后 做成一个dataFrame    \n",
    "    data = pd.DataFrame(lst, columns=cols)\n",
    "    print(data)\n",
    "cal_IV(df_train,'bin_age','SeriousDlqin2yrs')   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Variable         Value    All   Bad\n",
      "0  bin_age  (40.0, 50.0]  35037  2893\n",
      "1  bin_age  (25.0, 40.0]  32069  3296\n",
      "2  bin_age   (70.0, inf]  17636   398\n",
      "3  bin_age  (50.0, 60.0]  34806  2149\n",
      "4  bin_age  (60.0, 70.0]  27424   952\n",
      "5  bin_age  (-inf, 25.0]   3028   338\n",
      "新生成的data:\n",
      "   Variable         Value    All   Bad  Margin Bad  Margin Good       woe        iv    iv_sum\n",
      "0  bin_age  (40.0, 50.0]  35037  2893    0.288550     0.229643  0.228343  0.013451  0.240411\n",
      "1  bin_age  (25.0, 40.0]  32069  3296    0.328745     0.205560  0.469547  0.057841  0.240411\n",
      "2  bin_age   (70.0, inf]  17636   398    0.039697     0.123151 -1.132145  0.094483  0.240411\n",
      "3  bin_age  (50.0, 60.0]  34806  2149    0.214343     0.233308 -0.084782  0.001608  0.240411\n",
      "4  bin_age  (60.0, 70.0]  27424   952    0.094953     0.189121 -0.689003  0.064882  0.240411\n",
      "5  bin_age  (-inf, 25.0]   3028   338    0.033712     0.019218  0.562024  0.008146  0.240411\n",
      "排序之后的data：\n",
      "   Variable         Value    All   Bad  Margin Bad  Margin Good       woe        iv    iv_sum\n",
      "5  bin_age  (-inf, 25.0]   3028   338    0.033712     0.019218  0.562024  0.008146  0.240411\n",
      "1  bin_age  (25.0, 40.0]  32069  3296    0.328745     0.205560  0.469547  0.057841  0.240411\n",
      "0  bin_age  (40.0, 50.0]  35037  2893    0.288550     0.229643  0.228343  0.013451  0.240411\n",
      "3  bin_age  (50.0, 60.0]  34806  2149    0.214343     0.233308 -0.084782  0.001608  0.240411\n",
      "4  bin_age  (60.0, 70.0]  27424   952    0.094953     0.189121 -0.689003  0.064882  0.240411\n",
      "2  bin_age   (70.0, inf]  17636   398    0.039697     0.123151 -1.132145  0.094483  0.240411\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0.24041120302785982"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#计算IV,衡量一下变量的预测能力\n",
    "def cal_IV(df, feature, target):\n",
    "    lst = []\n",
    "    cols = ['Variable', 'Value', 'All', 'Bad']#变量名称 值 整个情况 bad的情况\n",
    "    # 对feature字段中的每个分箱的取值进行变量\n",
    "    for i in range(df[feature].nunique()): # unique代表不同的值，nunique = number of unique 不同值的个数\n",
    "        # feature字段的第i个分箱取值\n",
    "        val = list(df[feature].unique())[i]\n",
    "        # 统计feature比如 ‘bin_age’， feature 对应的分为值 比如 (40.0, 50.0]， \n",
    "        #len(df[df[feature]==val])这个 值 的个数 是一个总数，这个 值 导致target=1的个数 取名Bad 我们这个数据集的target就是那个SeriousDlqin2yrs\n",
    "        lst.append([feature, val, len(df[df[feature]==val]), len(df[(df[feature]==val) & (df[target]==1)])])    \n",
    "    #上述几个指标拿出来了以后 做成一个dataFrame    \n",
    "    data = pd.DataFrame(lst, columns=cols)\n",
    "    print(data)\n",
    "    # 筛选bad大于0的情况    #从这里开始就是计算WOE和IV值的公式了\n",
    "    data = data[data['Bad']>0]  \n",
    "    #data['Share'] = data['All'] / data['All'].sum() # 这个value所占比例   感觉这里也不需要这个\n",
    "    #data['Bad Rate'] = data['Bad'] / data['All'] # 这个value导致bad情况，在该value个数的比例  感觉这个不需要\n",
    "    data['Margin Bad'] = data['Bad'] / data['Bad'].sum() #  Margin Bad  某一项bad占所有bad之和的比例\n",
    "    data['Margin Good'] = (data['All'] - data['Bad']) / (data['All'] - data['Bad']).sum() \n",
    "    # 如果需要避免分子为0，导致-inf, log1p(x) = log(1+x), 这里 NumberOfTime60-89DaysPastDueNotWorse 第8分箱里bad为0\n",
    "    data['woe'] = np.log(data['Margin Bad'] / data['Margin Good'])\n",
    "    data['iv']  = ((data['Margin Bad'] - data['Margin Good']) * data['woe'])\n",
    "    data['iv_sum']  = ((data['Margin Bad'] - data['Margin Good']) * data['woe']).sum()\n",
    "    print(\"新生成的data:\\n\",data)\n",
    "    data.sort_values(by=['Variable', 'Value'], inplace=True)\n",
    "    print(\"排序之后的data：\\n\",data)\n",
    "    return (data['iv_sum'].values[0])\n",
    "\n",
    "cal_IV(df_train,'bin_age','SeriousDlqin2yrs')   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "bin_age 0.24041120302785982\n",
      "bin_NumberOfDependents 0.01450836007644442\n",
      "bin_NumberOfTime30-59DaysPastDueNotWorse 0.492444774570198\n",
      "bin_NumberOfTime60-89DaysPastDueNotWorse 0.2665587583516951\n",
      "bin_NumberOfTimes90DaysLate 0.49160685733515563\n",
      "bin_RevolvingUtilizationOfUnsecuredLines 1.0596188771423887\n",
      "bin_DebtRatio 0.05948761145809681\n",
      "bin_MonthlyIncome 0.05623446147714756\n",
      "bin_NumberOfOpenCreditLinesAndLoans 0.04802315528985505\n",
      "bin_NumberRealEstateLoansOrLines 0.06167337290177645\n"
     ]
    }
   ],
   "source": [
    "#计算IV,衡量一下变量的预测能力\n",
    "def cal_IV(df, feature, target):\n",
    "    lst = []\n",
    "    cols = ['Variable', 'Value', 'All', 'Bad']#变量名称 值 整个情况 bad的情况\n",
    "    for i in range(df[feature].nunique()):\n",
    "        val = list(df[feature].unique())[i]\n",
    "        lst.append([feature, val, len(df[df[feature]==val]), len(df[(df[feature]==val) & (df[target]==1)])])\n",
    "    data = pd.DataFrame(lst, columns=cols)\n",
    "    data = data[data['Bad']>0]\n",
    "    data['Margin Bad'] = data['Bad'] / data['Bad'].sum() #  Margin Bad  bad占所有value的比例\n",
    "    data['Margin Good'] = (data['All'] - data['Bad']) / (data['All'] - data['Bad']).sum() \n",
    "    data['woe'] = np.log(data['Margin Bad'] / data['Margin Good'])\n",
    "    data['iv']  = ((data['Margin Bad'] - data['Margin Good']) * data['woe'])\n",
    "    data['iv_sum']  = ((data['Margin Bad'] - data['Margin Good']) * data['woe']).sum()\n",
    "    data.sort_values(by=['Variable', 'Value'], inplace=True)\n",
    "    return (data['iv_sum'].values[0])\n",
    "#计算每个字段的iv值\n",
    "for f  in bin_cols:\n",
    "    print(f,cal_IV(df_train,f,'SeriousDlqin2yrs'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 只选择iv>0.1的值，入选的字段又5个\n",
    "* NumberOfTime30-59DaysPastDueNotWorse\n",
    "* NumberOfTime60-89DaysPastDueNotWorse\n",
    "* NumberOfTimes90DaysLate\n",
    "* bin_RevolvingUtilizationOfUnsecuredLines\n",
    "* bin_age"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4根据计算结果再探索一下数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['NumberOfTime30-59DaysPastDueNotWorse',\n",
       " 'NumberOfTime60-89DaysPastDueNotWorse',\n",
       " 'NumberOfTimes90DaysLate',\n",
       " 'RevolvingUtilizationOfUnsecuredLines',\n",
       " 'age']"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_cols=['NumberOfTime30-59DaysPastDueNotWorse','NumberOfTime60-89DaysPastDueNotWorse',\n",
    "              'NumberOfTimes90DaysLate','RevolvingUtilizationOfUnsecuredLines','age']\n",
    "feature_cols#我们把这5个字段组成一个 list  用 feature_cols 保存"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.699, 50708.0]    30000\n",
       "(0.271, 0.699]      30000\n",
       "(0.0832, 0.271]     30000\n",
       "(0.0192, 0.0832]    30000\n",
       "(-0.001, 0.0192]    30000\n",
       "Name: bin_RevolvingUtilizationOfUnsecuredLines, dtype: int64"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#下面这几句代码是对RevolvingUtilizationOfUnsecuredLines这个数据的探索 因为bin_RevolvingUtilizationOfUnsecuredLines= 1.0596188771423887  \n",
    "#大于0.5的就有点过分了 需要再看看咋回事儿\n",
    "#这个字段的意思是：除房地产和汽车贷款等无分期付款债务外， 信用卡和个人信用额度的总余额除以信贷限额\n",
    "df_train['bin_RevolvingUtilizationOfUnsecuredLines'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    150000.000000\n",
       "mean          6.048438\n",
       "std         249.755371\n",
       "min           0.000000\n",
       "25%           0.029867\n",
       "50%           0.154181\n",
       "75%           0.559046\n",
       "max       50708.000000\n",
       "Name: RevolvingUtilizationOfUnsecuredLines, dtype: float64"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['RevolvingUtilizationOfUnsecuredLines'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3321"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_train[df_train['RevolvingUtilizationOfUnsecuredLines']>1])#这是一个比例 一般是小于1的 但是这个地方大于1的还有3000多人 \n",
    "#而且最大值是50708  说明这个标签有一定的泄漏的意思   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.766127</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.957151</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.658180</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.233810</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.907239</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0.040674</td>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>0.299745</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0.246044</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>0.850283</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        RevolvingUtilizationOfUnsecuredLines bin_RevolvingUtilizationOfUnsecuredLines\n",
       "1                                   0.766127                         (0.699, 50708.0]\n",
       "2                                   0.957151                         (0.699, 50708.0]\n",
       "3                                   0.658180                           (0.271, 0.699]\n",
       "4                                   0.233810                          (0.0832, 0.271]\n",
       "5                                   0.907239                         (0.699, 50708.0]\n",
       "...                                      ...                                      ...\n",
       "149996                              0.040674                         (0.0192, 0.0832]\n",
       "149997                              0.299745                           (0.271, 0.699]\n",
       "149998                              0.246044                          (0.0832, 0.271]\n",
       "149999                              0.000000                         (-0.001, 0.0192]\n",
       "150000                              0.850283                         (0.699, 50708.0]\n",
       "\n",
       "[150000 rows x 2 columns]"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train[['RevolvingUtilizationOfUnsecuredLines','bin_RevolvingUtilizationOfUnsecuredLines']]#先看看分布情况\n",
    "#这个数字分段是一个比例"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 5WOE编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SeriousDlqin2yrs</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>age</th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>DebtRatio</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfDependents</th>\n",
       "      <th>bin_age</th>\n",
       "      <th>bin_NumberOfDependents</th>\n",
       "      <th>bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>bin_DebtRatio</th>\n",
       "      <th>bin_MonthlyIncome</th>\n",
       "      <th>bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>bin_NumberRealEstateLoansOrLines</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0.766127</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>0.802982</td>\n",
       "      <td>9120.0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>(3.0, inf]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0.957151</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0.121876</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0.658180</td>\n",
       "      <td>38</td>\n",
       "      <td>1</td>\n",
       "      <td>0.085113</td>\n",
       "      <td>3042.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0.233810</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.036050</td>\n",
       "      <td>3300.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(4.0, 6.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "      <td>0.907239</td>\n",
       "      <td>49</td>\n",
       "      <td>1</td>\n",
       "      <td>0.024926</td>\n",
       "      <td>63588.0</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0</td>\n",
       "      <td>0.040674</td>\n",
       "      <td>74</td>\n",
       "      <td>0</td>\n",
       "      <td>0.225131</td>\n",
       "      <td>2100.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>0</td>\n",
       "      <td>0.299745</td>\n",
       "      <td>44</td>\n",
       "      <td>0</td>\n",
       "      <td>0.716562</td>\n",
       "      <td>5584.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0</td>\n",
       "      <td>0.246044</td>\n",
       "      <td>58</td>\n",
       "      <td>0</td>\n",
       "      <td>3870.000000</td>\n",
       "      <td>5400.0</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>(4.0, 329664.0]</td>\n",
       "      <td>(3400.0, 5400.0]</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5716.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>0</td>\n",
       "      <td>0.850283</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0.249908</td>\n",
       "      <td>8158.0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        SeriousDlqin2yrs  RevolvingUtilizationOfUnsecuredLines  age  NumberOfTime30-59DaysPastDueNotWorse    DebtRatio  MonthlyIncome  NumberOfOpenCreditLinesAndLoans  NumberOfTimes90DaysLate  NumberRealEstateLoansOrLines  NumberOfTime60-89DaysPastDueNotWorse  NumberOfDependents       bin_age bin_NumberOfDependents bin_NumberOfTime30-59DaysPastDueNotWorse bin_NumberOfTime60-89DaysPastDueNotWorse bin_NumberOfTimes90DaysLate bin_RevolvingUtilizationOfUnsecuredLines    bin_DebtRatio    bin_MonthlyIncome bin_NumberOfOpenCreditLinesAndLoans bin_NumberRealEstateLoansOrLines\n",
       "1                      1                              0.766127   45                                     2     0.802982         9120.0                               13                        0                             6                                     0                 2.0  (40.0, 50.0]            (-inf, 2.0]                               (1.0, 2.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]     (0.468, 4.0]  (8250.0, 3008750.0]                        (12.0, 58.0]                       (3.0, inf]\n",
       "2                      0                              0.957151   40                                     0     0.121876         2600.0                                4                        0                             0                                     0                 1.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]  (-0.001, 0.134]     (-0.001, 3400.0]                       (-0.001, 4.0]                      (-inf, 0.0]\n",
       "3                      0                              0.658180   38                                     1     0.085113         3042.0                                2                        1                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                           (0.271, 0.699]  (-0.001, 0.134]     (-0.001, 3400.0]                       (-0.001, 4.0]                      (-inf, 0.0]\n",
       "4                      0                              0.233810   30                                     0     0.036050         3300.0                                5                        0                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                          (0.0832, 0.271]  (-0.001, 0.134]     (-0.001, 3400.0]                          (4.0, 6.0]                      (-inf, 0.0]\n",
       "5                      0                              0.907239   49                                     1     0.024926        63588.0                                7                        0                             1                                     0                 0.0  (40.0, 50.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]  (-0.001, 0.134]  (8250.0, 3008750.0]                          (6.0, 9.0]                       (0.0, 1.0]\n",
       "...                  ...                                   ...  ...                                   ...          ...            ...                              ...                      ...                           ...                                   ...                 ...           ...                    ...                                      ...                                      ...                         ...                                      ...              ...                  ...                                 ...                              ...\n",
       "149996                 0                              0.040674   74                                     0     0.225131         2100.0                                4                        0                             1                                     0                 0.0   (70.0, inf]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.0192, 0.0832]   (0.134, 0.287]     (-0.001, 3400.0]                       (-0.001, 4.0]                       (0.0, 1.0]\n",
       "149997                 0                              0.299745   44                                     0     0.716562         5584.0                                4                        0                             1                                     0                 2.0  (40.0, 50.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                           (0.271, 0.699]     (0.468, 4.0]     (5400.0, 8250.0]                       (-0.001, 4.0]                       (0.0, 1.0]\n",
       "149998                 0                              0.246044   58                                     0  3870.000000         5400.0                               18                        0                             1                                     0                 0.0  (50.0, 60.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                          (0.0832, 0.271]  (4.0, 329664.0]     (3400.0, 5400.0]                        (12.0, 58.0]                       (0.0, 1.0]\n",
       "149999                 0                              0.000000   30                                     0     0.000000         5716.0                                4                        0                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (-0.001, 0.0192]  (-0.001, 0.134]     (5400.0, 8250.0]                       (-0.001, 4.0]                      (-inf, 0.0]\n",
       "150000                 0                              0.850283   64                                     0     0.249908         8158.0                                8                        0                             2                                     0                 0.0  (60.0, 70.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]   (0.134, 0.287]     (5400.0, 8250.0]                          (6.0, 9.0]                       (1.0, 2.0]\n",
       "\n",
       "[150000 rows x 21 columns]"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_new = df_train.copy()\n",
    "df_new"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['bin_age',\n",
       " 'bin_NumberOfDependents',\n",
       " 'bin_NumberOfTime30-59DaysPastDueNotWorse',\n",
       " 'bin_NumberOfTime60-89DaysPastDueNotWorse',\n",
       " 'bin_NumberOfTimes90DaysLate',\n",
       " 'bin_RevolvingUtilizationOfUnsecuredLines',\n",
       " 'bin_DebtRatio',\n",
       " 'bin_MonthlyIncome',\n",
       " 'bin_NumberOfOpenCreditLinesAndLoans',\n",
       " 'bin_NumberRealEstateLoansOrLines']"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bin_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">SeriousDlqin2yrs</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>sum</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bin_age</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>(-inf, 25.0]</th>\n",
       "      <td>338</td>\n",
       "      <td>3028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(25.0, 40.0]</th>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(40.0, 50.0]</th>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(50.0, 60.0]</th>\n",
       "      <td>2149</td>\n",
       "      <td>34806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(60.0, 70.0]</th>\n",
       "      <td>952</td>\n",
       "      <td>27424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(70.0, inf]</th>\n",
       "      <td>398</td>\n",
       "      <td>17636</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             SeriousDlqin2yrs       \n",
       "                          sum  count\n",
       "bin_age                             \n",
       "(-inf, 25.0]              338   3028\n",
       "(25.0, 40.0]             3296  32069\n",
       "(40.0, 50.0]             2893  35037\n",
       "(50.0, 60.0]             2149  34806\n",
       "(60.0, 70.0]              952  27424\n",
       "(70.0, inf]               398  17636"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#下面这几步是下面那几个大函数的分步走\n",
    "df_woe=df_new.groupby('bin_age').agg({'SeriousDlqin2yrs':['sum', 'count']})\n",
    "df_woe#这里是分组聚合 sum表示那一组情况出现的个数 比如(-inf, 25.0]这一组出现了3208次 但是它是一个分组  在这个组的人的个数是3028   \n",
    "#count的和是15000    #因为这里是按照 SeriousDlqin2yrs来聚合的  所以sum的数量就是label=1的数量  就是要违约了  即bad的数量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('SeriousDlqin2yrs', 'sum') ('SeriousDlqin2yrs', 'count')] <class 'numpy.ndarray'>\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SeriousDlqin2yrssum</th>\n",
       "      <th>SeriousDlqin2yrscount</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bin_age</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>(-inf, 25.0]</th>\n",
       "      <td>338</td>\n",
       "      <td>3028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(25.0, 40.0]</th>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(40.0, 50.0]</th>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(50.0, 60.0]</th>\n",
       "      <td>2149</td>\n",
       "      <td>34806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(60.0, 70.0]</th>\n",
       "      <td>952</td>\n",
       "      <td>27424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(70.0, inf]</th>\n",
       "      <td>398</td>\n",
       "      <td>17636</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              SeriousDlqin2yrssum  SeriousDlqin2yrscount\n",
       "bin_age                                                 \n",
       "(-inf, 25.0]                  338                   3028\n",
       "(25.0, 40.0]                 3296                  32069\n",
       "(40.0, 50.0]                 2893                  35037\n",
       "(50.0, 60.0]                 2149                  34806\n",
       "(60.0, 70.0]                  952                  27424\n",
       "(70.0, inf]                   398                  17636"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(df_woe.columns.values,type(df_woe.columns.values))\n",
    "df_woe.columns=list(map(''.join, df_woe.columns.values))#map函数的作用是把 括号内部前面的function作用在后面中的序列中的每一个元素上\n",
    "df_woe#这里是把里面的两个字段起一个连接作用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bin_age</th>\n",
       "      <th>SeriousDlqin2yrssum</th>\n",
       "      <th>SeriousDlqin2yrscount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>(-inf, 25.0]</td>\n",
       "      <td>338</td>\n",
       "      <td>3028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>2149</td>\n",
       "      <td>34806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>952</td>\n",
       "      <td>27424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>398</td>\n",
       "      <td>17636</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        bin_age  SeriousDlqin2yrssum  SeriousDlqin2yrscount\n",
       "0  (-inf, 25.0]                  338                   3028\n",
       "1  (25.0, 40.0]                 3296                  32069\n",
       "2  (40.0, 50.0]                 2893                  35037\n",
       "3  (50.0, 60.0]                 2149                  34806\n",
       "4  (60.0, 70.0]                  952                  27424\n",
       "5   (70.0, inf]                  398                  17636"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_woe = df_woe.reset_index()#重置一下索引\n",
    "df_woe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bin_age</th>\n",
       "      <th>bad</th>\n",
       "      <th>all</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>(-inf, 25.0]</td>\n",
       "      <td>338</td>\n",
       "      <td>3028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>2149</td>\n",
       "      <td>34806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>952</td>\n",
       "      <td>27424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>398</td>\n",
       "      <td>17636</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        bin_age   bad    all\n",
       "0  (-inf, 25.0]   338   3028\n",
       "1  (25.0, 40.0]  3296  32069\n",
       "2  (40.0, 50.0]  2893  35037\n",
       "3  (50.0, 60.0]  2149  34806\n",
       "4  (60.0, 70.0]   952  27424\n",
       "5   (70.0, inf]   398  17636"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_woe = df_woe.rename(columns={'SeriousDlqin2yrs'+'sum':'bad', 'SeriousDlqin2yrs'+'count':'all'})#更改索引名称\n",
    "df_woe#这里实际上就是bad和all 有了这些东西之后 就可以去计算WOE编码了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SeriousDlqin2yrs</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>age</th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>DebtRatio</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfDependents</th>\n",
       "      <th>bin_age</th>\n",
       "      <th>bin_NumberOfDependents</th>\n",
       "      <th>bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>bin_DebtRatio</th>\n",
       "      <th>bin_MonthlyIncome</th>\n",
       "      <th>bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>bad_bin_age</th>\n",
       "      <th>all_bin_age</th>\n",
       "      <th>good_bin_age</th>\n",
       "      <th>Margin Bad_bin_age</th>\n",
       "      <th>Margin Good_bin_age</th>\n",
       "      <th>woe_bin_age</th>\n",
       "      <th>bad_bin_NumberOfDependents</th>\n",
       "      <th>all_bin_NumberOfDependents</th>\n",
       "      <th>good_bin_NumberOfDependents</th>\n",
       "      <th>Margin Bad_bin_NumberOfDependents</th>\n",
       "      <th>Margin Good_bin_NumberOfDependents</th>\n",
       "      <th>woe_bin_NumberOfDependents</th>\n",
       "      <th>bad_bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>all_bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>good_bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>Margin Bad_bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>Margin Good_bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>woe_bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>bad_bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>all_bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>good_bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>Margin Bad_bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>Margin Good_bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>woe_bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>bad_bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>all_bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>good_bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>Margin Bad_bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>Margin Good_bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>woe_bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>bad_bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>all_bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>good_bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>Margin Bad_bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>Margin Good_bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>woe_bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>bad_bin_DebtRatio</th>\n",
       "      <th>all_bin_DebtRatio</th>\n",
       "      <th>good_bin_DebtRatio</th>\n",
       "      <th>Margin Bad_bin_DebtRatio</th>\n",
       "      <th>Margin Good_bin_DebtRatio</th>\n",
       "      <th>woe_bin_DebtRatio</th>\n",
       "      <th>bad_bin_MonthlyIncome</th>\n",
       "      <th>all_bin_MonthlyIncome</th>\n",
       "      <th>good_bin_MonthlyIncome</th>\n",
       "      <th>Margin Bad_bin_MonthlyIncome</th>\n",
       "      <th>Margin Good_bin_MonthlyIncome</th>\n",
       "      <th>woe_bin_MonthlyIncome</th>\n",
       "      <th>bad_bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>all_bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>good_bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>Margin Bad_bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>Margin Good_bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>woe_bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>bad_bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>all_bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>good_bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>Margin Bad_bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>Margin Good_bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>woe_bin_NumberRealEstateLoansOrLines</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.766127</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>0.802982</td>\n",
       "      <td>9120.0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>(3.0, inf]</td>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "      <td>32144</td>\n",
       "      <td>0.288550</td>\n",
       "      <td>0.229643</td>\n",
       "      <td>0.813822</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>1219</td>\n",
       "      <td>4598</td>\n",
       "      <td>3379</td>\n",
       "      <td>0.121584</td>\n",
       "      <td>0.024140</td>\n",
       "      <td>1.797837</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>5963</td>\n",
       "      <td>30000</td>\n",
       "      <td>24037</td>\n",
       "      <td>0.594754</td>\n",
       "      <td>0.171725</td>\n",
       "      <td>1.495914</td>\n",
       "      <td>2974</td>\n",
       "      <td>30109</td>\n",
       "      <td>27135</td>\n",
       "      <td>0.296629</td>\n",
       "      <td>0.193857</td>\n",
       "      <td>0.928274</td>\n",
       "      <td>1387</td>\n",
       "      <td>29993</td>\n",
       "      <td>28606</td>\n",
       "      <td>0.138340</td>\n",
       "      <td>0.204367</td>\n",
       "      <td>0.516960</td>\n",
       "      <td>1846</td>\n",
       "      <td>27684</td>\n",
       "      <td>25838</td>\n",
       "      <td>0.184121</td>\n",
       "      <td>0.184591</td>\n",
       "      <td>0.691873</td>\n",
       "      <td>419</td>\n",
       "      <td>3652</td>\n",
       "      <td>3233</td>\n",
       "      <td>0.041791</td>\n",
       "      <td>0.023097</td>\n",
       "      <td>1.032961</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>0.957151</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0.121876</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "      <td>28773</td>\n",
       "      <td>0.328745</td>\n",
       "      <td>0.205560</td>\n",
       "      <td>0.955231</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>5963</td>\n",
       "      <td>30000</td>\n",
       "      <td>24037</td>\n",
       "      <td>0.594754</td>\n",
       "      <td>0.171725</td>\n",
       "      <td>1.495914</td>\n",
       "      <td>1830</td>\n",
       "      <td>30000</td>\n",
       "      <td>28170</td>\n",
       "      <td>0.182525</td>\n",
       "      <td>0.201252</td>\n",
       "      <td>0.645506</td>\n",
       "      <td>2789</td>\n",
       "      <td>30289</td>\n",
       "      <td>27500</td>\n",
       "      <td>0.278177</td>\n",
       "      <td>0.196465</td>\n",
       "      <td>0.882076</td>\n",
       "      <td>3103</td>\n",
       "      <td>33659</td>\n",
       "      <td>30556</td>\n",
       "      <td>0.309495</td>\n",
       "      <td>0.218298</td>\n",
       "      <td>0.882845</td>\n",
       "      <td>4672</td>\n",
       "      <td>56188</td>\n",
       "      <td>51516</td>\n",
       "      <td>0.465988</td>\n",
       "      <td>0.368040</td>\n",
       "      <td>0.818076</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0.658180</td>\n",
       "      <td>38</td>\n",
       "      <td>1</td>\n",
       "      <td>0.085113</td>\n",
       "      <td>3042.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "      <td>28773</td>\n",
       "      <td>0.328745</td>\n",
       "      <td>0.205560</td>\n",
       "      <td>0.955231</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>2107</td>\n",
       "      <td>30000</td>\n",
       "      <td>27893</td>\n",
       "      <td>0.210154</td>\n",
       "      <td>0.199273</td>\n",
       "      <td>0.720083</td>\n",
       "      <td>1830</td>\n",
       "      <td>30000</td>\n",
       "      <td>28170</td>\n",
       "      <td>0.182525</td>\n",
       "      <td>0.201252</td>\n",
       "      <td>0.645506</td>\n",
       "      <td>2789</td>\n",
       "      <td>30289</td>\n",
       "      <td>27500</td>\n",
       "      <td>0.278177</td>\n",
       "      <td>0.196465</td>\n",
       "      <td>0.882076</td>\n",
       "      <td>3103</td>\n",
       "      <td>33659</td>\n",
       "      <td>30556</td>\n",
       "      <td>0.309495</td>\n",
       "      <td>0.218298</td>\n",
       "      <td>0.882845</td>\n",
       "      <td>4672</td>\n",
       "      <td>56188</td>\n",
       "      <td>51516</td>\n",
       "      <td>0.465988</td>\n",
       "      <td>0.368040</td>\n",
       "      <td>0.818076</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0.233810</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.036050</td>\n",
       "      <td>3300.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(4.0, 6.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "      <td>28773</td>\n",
       "      <td>0.328745</td>\n",
       "      <td>0.205560</td>\n",
       "      <td>0.955231</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>877</td>\n",
       "      <td>30000</td>\n",
       "      <td>29123</td>\n",
       "      <td>0.087473</td>\n",
       "      <td>0.208060</td>\n",
       "      <td>0.350952</td>\n",
       "      <td>1830</td>\n",
       "      <td>30000</td>\n",
       "      <td>28170</td>\n",
       "      <td>0.182525</td>\n",
       "      <td>0.201252</td>\n",
       "      <td>0.645506</td>\n",
       "      <td>2789</td>\n",
       "      <td>30289</td>\n",
       "      <td>27500</td>\n",
       "      <td>0.278177</td>\n",
       "      <td>0.196465</td>\n",
       "      <td>0.882076</td>\n",
       "      <td>1573</td>\n",
       "      <td>26545</td>\n",
       "      <td>24972</td>\n",
       "      <td>0.156892</td>\n",
       "      <td>0.178405</td>\n",
       "      <td>0.630962</td>\n",
       "      <td>4672</td>\n",
       "      <td>56188</td>\n",
       "      <td>51516</td>\n",
       "      <td>0.465988</td>\n",
       "      <td>0.368040</td>\n",
       "      <td>0.818076</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0.907239</td>\n",
       "      <td>49</td>\n",
       "      <td>1</td>\n",
       "      <td>0.024926</td>\n",
       "      <td>63588.0</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "      <td>32144</td>\n",
       "      <td>0.288550</td>\n",
       "      <td>0.229643</td>\n",
       "      <td>0.813822</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>5963</td>\n",
       "      <td>30000</td>\n",
       "      <td>24037</td>\n",
       "      <td>0.594754</td>\n",
       "      <td>0.171725</td>\n",
       "      <td>1.495914</td>\n",
       "      <td>1830</td>\n",
       "      <td>30000</td>\n",
       "      <td>28170</td>\n",
       "      <td>0.182525</td>\n",
       "      <td>0.201252</td>\n",
       "      <td>0.645506</td>\n",
       "      <td>1387</td>\n",
       "      <td>29993</td>\n",
       "      <td>28606</td>\n",
       "      <td>0.138340</td>\n",
       "      <td>0.204367</td>\n",
       "      <td>0.516960</td>\n",
       "      <td>2017</td>\n",
       "      <td>37162</td>\n",
       "      <td>35145</td>\n",
       "      <td>0.201177</td>\n",
       "      <td>0.251082</td>\n",
       "      <td>0.588475</td>\n",
       "      <td>2748</td>\n",
       "      <td>52338</td>\n",
       "      <td>49590</td>\n",
       "      <td>0.274087</td>\n",
       "      <td>0.354280</td>\n",
       "      <td>0.573037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149995</th>\n",
       "      <td>0</td>\n",
       "      <td>0.040674</td>\n",
       "      <td>74</td>\n",
       "      <td>0</td>\n",
       "      <td>0.225131</td>\n",
       "      <td>2100.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>398</td>\n",
       "      <td>17636</td>\n",
       "      <td>17238</td>\n",
       "      <td>0.039697</td>\n",
       "      <td>0.123151</td>\n",
       "      <td>0.279404</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>497</td>\n",
       "      <td>30000</td>\n",
       "      <td>29503</td>\n",
       "      <td>0.049571</td>\n",
       "      <td>0.210775</td>\n",
       "      <td>0.211221</td>\n",
       "      <td>1716</td>\n",
       "      <td>30000</td>\n",
       "      <td>28284</td>\n",
       "      <td>0.171155</td>\n",
       "      <td>0.202066</td>\n",
       "      <td>0.613576</td>\n",
       "      <td>2789</td>\n",
       "      <td>30289</td>\n",
       "      <td>27500</td>\n",
       "      <td>0.278177</td>\n",
       "      <td>0.196465</td>\n",
       "      <td>0.882076</td>\n",
       "      <td>3103</td>\n",
       "      <td>33659</td>\n",
       "      <td>30556</td>\n",
       "      <td>0.309495</td>\n",
       "      <td>0.218298</td>\n",
       "      <td>0.882845</td>\n",
       "      <td>2748</td>\n",
       "      <td>52338</td>\n",
       "      <td>49590</td>\n",
       "      <td>0.274087</td>\n",
       "      <td>0.354280</td>\n",
       "      <td>0.573037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0</td>\n",
       "      <td>0.299745</td>\n",
       "      <td>44</td>\n",
       "      <td>0</td>\n",
       "      <td>0.716562</td>\n",
       "      <td>5584.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "      <td>32144</td>\n",
       "      <td>0.288550</td>\n",
       "      <td>0.229643</td>\n",
       "      <td>0.813822</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>2107</td>\n",
       "      <td>30000</td>\n",
       "      <td>27893</td>\n",
       "      <td>0.210154</td>\n",
       "      <td>0.199273</td>\n",
       "      <td>0.720083</td>\n",
       "      <td>2974</td>\n",
       "      <td>30109</td>\n",
       "      <td>27135</td>\n",
       "      <td>0.296629</td>\n",
       "      <td>0.193857</td>\n",
       "      <td>0.928274</td>\n",
       "      <td>1819</td>\n",
       "      <td>29961</td>\n",
       "      <td>28142</td>\n",
       "      <td>0.181428</td>\n",
       "      <td>0.201052</td>\n",
       "      <td>0.643114</td>\n",
       "      <td>3103</td>\n",
       "      <td>33659</td>\n",
       "      <td>30556</td>\n",
       "      <td>0.309495</td>\n",
       "      <td>0.218298</td>\n",
       "      <td>0.882845</td>\n",
       "      <td>2748</td>\n",
       "      <td>52338</td>\n",
       "      <td>49590</td>\n",
       "      <td>0.274087</td>\n",
       "      <td>0.354280</td>\n",
       "      <td>0.573037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>0</td>\n",
       "      <td>0.246044</td>\n",
       "      <td>58</td>\n",
       "      <td>0</td>\n",
       "      <td>3870.000000</td>\n",
       "      <td>5400.0</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>(4.0, 329664.0]</td>\n",
       "      <td>(3400.0, 5400.0]</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>2149</td>\n",
       "      <td>34806</td>\n",
       "      <td>32657</td>\n",
       "      <td>0.214343</td>\n",
       "      <td>0.233308</td>\n",
       "      <td>0.651655</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>877</td>\n",
       "      <td>30000</td>\n",
       "      <td>29123</td>\n",
       "      <td>0.087473</td>\n",
       "      <td>0.208060</td>\n",
       "      <td>0.350952</td>\n",
       "      <td>1653</td>\n",
       "      <td>29891</td>\n",
       "      <td>28238</td>\n",
       "      <td>0.164871</td>\n",
       "      <td>0.201737</td>\n",
       "      <td>0.597328</td>\n",
       "      <td>4031</td>\n",
       "      <td>59757</td>\n",
       "      <td>55726</td>\n",
       "      <td>0.402055</td>\n",
       "      <td>0.398117</td>\n",
       "      <td>0.698081</td>\n",
       "      <td>1846</td>\n",
       "      <td>27684</td>\n",
       "      <td>25838</td>\n",
       "      <td>0.184121</td>\n",
       "      <td>0.184591</td>\n",
       "      <td>0.691873</td>\n",
       "      <td>2748</td>\n",
       "      <td>52338</td>\n",
       "      <td>49590</td>\n",
       "      <td>0.274087</td>\n",
       "      <td>0.354280</td>\n",
       "      <td>0.573037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5716.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "      <td>28773</td>\n",
       "      <td>0.328745</td>\n",
       "      <td>0.205560</td>\n",
       "      <td>0.955231</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>582</td>\n",
       "      <td>30000</td>\n",
       "      <td>29418</td>\n",
       "      <td>0.058049</td>\n",
       "      <td>0.210168</td>\n",
       "      <td>0.243890</td>\n",
       "      <td>1830</td>\n",
       "      <td>30000</td>\n",
       "      <td>28170</td>\n",
       "      <td>0.182525</td>\n",
       "      <td>0.201252</td>\n",
       "      <td>0.645506</td>\n",
       "      <td>1819</td>\n",
       "      <td>29961</td>\n",
       "      <td>28142</td>\n",
       "      <td>0.181428</td>\n",
       "      <td>0.201052</td>\n",
       "      <td>0.643114</td>\n",
       "      <td>3103</td>\n",
       "      <td>33659</td>\n",
       "      <td>30556</td>\n",
       "      <td>0.309495</td>\n",
       "      <td>0.218298</td>\n",
       "      <td>0.882845</td>\n",
       "      <td>4672</td>\n",
       "      <td>56188</td>\n",
       "      <td>51516</td>\n",
       "      <td>0.465988</td>\n",
       "      <td>0.368040</td>\n",
       "      <td>0.818076</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0</td>\n",
       "      <td>0.850283</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0.249908</td>\n",
       "      <td>8158.0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>952</td>\n",
       "      <td>27424</td>\n",
       "      <td>26472</td>\n",
       "      <td>0.094953</td>\n",
       "      <td>0.189121</td>\n",
       "      <td>0.406848</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>5963</td>\n",
       "      <td>30000</td>\n",
       "      <td>24037</td>\n",
       "      <td>0.594754</td>\n",
       "      <td>0.171725</td>\n",
       "      <td>1.495914</td>\n",
       "      <td>1716</td>\n",
       "      <td>30000</td>\n",
       "      <td>28284</td>\n",
       "      <td>0.171155</td>\n",
       "      <td>0.202066</td>\n",
       "      <td>0.613576</td>\n",
       "      <td>1819</td>\n",
       "      <td>29961</td>\n",
       "      <td>28142</td>\n",
       "      <td>0.181428</td>\n",
       "      <td>0.201052</td>\n",
       "      <td>0.643114</td>\n",
       "      <td>2017</td>\n",
       "      <td>37162</td>\n",
       "      <td>35145</td>\n",
       "      <td>0.201177</td>\n",
       "      <td>0.251082</td>\n",
       "      <td>0.588475</td>\n",
       "      <td>1765</td>\n",
       "      <td>31522</td>\n",
       "      <td>29757</td>\n",
       "      <td>0.176042</td>\n",
       "      <td>0.212589</td>\n",
       "      <td>0.603269</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 81 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        SeriousDlqin2yrs  RevolvingUtilizationOfUnsecuredLines  age  NumberOfTime30-59DaysPastDueNotWorse    DebtRatio  MonthlyIncome  NumberOfOpenCreditLinesAndLoans  NumberOfTimes90DaysLate  NumberRealEstateLoansOrLines  NumberOfTime60-89DaysPastDueNotWorse  NumberOfDependents       bin_age bin_NumberOfDependents bin_NumberOfTime30-59DaysPastDueNotWorse bin_NumberOfTime60-89DaysPastDueNotWorse bin_NumberOfTimes90DaysLate bin_RevolvingUtilizationOfUnsecuredLines    bin_DebtRatio    bin_MonthlyIncome bin_NumberOfOpenCreditLinesAndLoans bin_NumberRealEstateLoansOrLines  bad_bin_age  all_bin_age  good_bin_age  Margin Bad_bin_age  Margin Good_bin_age  woe_bin_age  bad_bin_NumberOfDependents  all_bin_NumberOfDependents  good_bin_NumberOfDependents  Margin Bad_bin_NumberOfDependents  Margin Good_bin_NumberOfDependents  woe_bin_NumberOfDependents  bad_bin_NumberOfTime30-59DaysPastDueNotWorse  all_bin_NumberOfTime30-59DaysPastDueNotWorse  good_bin_NumberOfTime30-59DaysPastDueNotWorse  Margin Bad_bin_NumberOfTime30-59DaysPastDueNotWorse  Margin Good_bin_NumberOfTime30-59DaysPastDueNotWorse  woe_bin_NumberOfTime30-59DaysPastDueNotWorse  bad_bin_NumberOfTime60-89DaysPastDueNotWorse  all_bin_NumberOfTime60-89DaysPastDueNotWorse  good_bin_NumberOfTime60-89DaysPastDueNotWorse  Margin Bad_bin_NumberOfTime60-89DaysPastDueNotWorse  Margin Good_bin_NumberOfTime60-89DaysPastDueNotWorse  woe_bin_NumberOfTime60-89DaysPastDueNotWorse  bad_bin_NumberOfTimes90DaysLate  all_bin_NumberOfTimes90DaysLate  good_bin_NumberOfTimes90DaysLate  Margin Bad_bin_NumberOfTimes90DaysLate  Margin Good_bin_NumberOfTimes90DaysLate  woe_bin_NumberOfTimes90DaysLate  bad_bin_RevolvingUtilizationOfUnsecuredLines  all_bin_RevolvingUtilizationOfUnsecuredLines  good_bin_RevolvingUtilizationOfUnsecuredLines  Margin Bad_bin_RevolvingUtilizationOfUnsecuredLines  Margin Good_bin_RevolvingUtilizationOfUnsecuredLines  woe_bin_RevolvingUtilizationOfUnsecuredLines  bad_bin_DebtRatio  all_bin_DebtRatio  good_bin_DebtRatio  Margin Bad_bin_DebtRatio  Margin Good_bin_DebtRatio  woe_bin_DebtRatio  bad_bin_MonthlyIncome  all_bin_MonthlyIncome  good_bin_MonthlyIncome  Margin Bad_bin_MonthlyIncome  Margin Good_bin_MonthlyIncome  woe_bin_MonthlyIncome  bad_bin_NumberOfOpenCreditLinesAndLoans  all_bin_NumberOfOpenCreditLinesAndLoans  good_bin_NumberOfOpenCreditLinesAndLoans  Margin Bad_bin_NumberOfOpenCreditLinesAndLoans  Margin Good_bin_NumberOfOpenCreditLinesAndLoans  woe_bin_NumberOfOpenCreditLinesAndLoans  bad_bin_NumberRealEstateLoansOrLines  all_bin_NumberRealEstateLoansOrLines  good_bin_NumberRealEstateLoansOrLines  Margin Bad_bin_NumberRealEstateLoansOrLines  Margin Good_bin_NumberRealEstateLoansOrLines  woe_bin_NumberRealEstateLoansOrLines\n",
       "0                      1                              0.766127   45                                     2     0.802982         9120.0                               13                        0                             6                                     0                 2.0  (40.0, 50.0]            (-inf, 2.0]                               (1.0, 2.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]     (0.468, 4.0]  (8250.0, 3008750.0]                        (12.0, 58.0]                       (3.0, inf]         2893        35037         32144            0.288550             0.229643     0.813822                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          1219                                          4598                                           3379                                           0.121584                                             0.024140                                         1.797837                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                          5963                                         30000                                          24037                                           0.594754                                             0.171725                                         1.495914               2974              30109               27135                  0.296629                   0.193857           0.928274                   1387                  29993                   28606                      0.138340                       0.204367               0.516960                                     1846                                    27684                                     25838                                        0.184121                                         0.184591                                 0.691873                                   419                                  3652                                   3233                                     0.041791                                      0.023097                              1.032961\n",
       "1                      0                              0.957151   40                                     0     0.121876         2600.0                                4                        0                             0                                     0                 1.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]  (-0.001, 0.134]     (-0.001, 3400.0]                       (-0.001, 4.0]                      (-inf, 0.0]         3296        32069         28773            0.328745             0.205560     0.955231                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                          5963                                         30000                                          24037                                           0.594754                                             0.171725                                         1.495914               1830              30000               28170                  0.182525                   0.201252           0.645506                   2789                  30289                   27500                      0.278177                       0.196465               0.882076                                     3103                                    33659                                     30556                                        0.309495                                         0.218298                                 0.882845                                  4672                                 56188                                  51516                                     0.465988                                      0.368040                              0.818076\n",
       "2                      0                              0.658180   38                                     1     0.085113         3042.0                                2                        1                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                           (0.271, 0.699]  (-0.001, 0.134]     (-0.001, 3400.0]                       (-0.001, 4.0]                      (-inf, 0.0]         3296        32069         28773            0.328745             0.205560     0.955231                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                          2107                                         30000                                          27893                                           0.210154                                             0.199273                                         0.720083               1830              30000               28170                  0.182525                   0.201252           0.645506                   2789                  30289                   27500                      0.278177                       0.196465               0.882076                                     3103                                    33659                                     30556                                        0.309495                                         0.218298                                 0.882845                                  4672                                 56188                                  51516                                     0.465988                                      0.368040                              0.818076\n",
       "3                      0                              0.233810   30                                     0     0.036050         3300.0                                5                        0                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                          (0.0832, 0.271]  (-0.001, 0.134]     (-0.001, 3400.0]                          (4.0, 6.0]                      (-inf, 0.0]         3296        32069         28773            0.328745             0.205560     0.955231                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                           877                                         30000                                          29123                                           0.087473                                             0.208060                                         0.350952               1830              30000               28170                  0.182525                   0.201252           0.645506                   2789                  30289                   27500                      0.278177                       0.196465               0.882076                                     1573                                    26545                                     24972                                        0.156892                                         0.178405                                 0.630962                                  4672                                 56188                                  51516                                     0.465988                                      0.368040                              0.818076\n",
       "4                      0                              0.907239   49                                     1     0.024926        63588.0                                7                        0                             1                                     0                 0.0  (40.0, 50.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]  (-0.001, 0.134]  (8250.0, 3008750.0]                          (6.0, 9.0]                       (0.0, 1.0]         2893        35037         32144            0.288550             0.229643     0.813822                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                          5963                                         30000                                          24037                                           0.594754                                             0.171725                                         1.495914               1830              30000               28170                  0.182525                   0.201252           0.645506                   1387                  29993                   28606                      0.138340                       0.204367               0.516960                                     2017                                    37162                                     35145                                        0.201177                                         0.251082                                 0.588475                                  2748                                 52338                                  49590                                     0.274087                                      0.354280                              0.573037\n",
       "...                  ...                                   ...  ...                                   ...          ...            ...                              ...                      ...                           ...                                   ...                 ...           ...                    ...                                      ...                                      ...                         ...                                      ...              ...                  ...                                 ...                              ...          ...          ...           ...                 ...                  ...          ...                         ...                         ...                          ...                                ...                                 ...                         ...                                           ...                                           ...                                            ...                                                ...                                                  ...                                              ...                                           ...                                           ...                                            ...                                                ...                                                  ...                                              ...                              ...                              ...                               ...                                     ...                                      ...                              ...                                           ...                                           ...                                            ...                                                ...                                                  ...                                              ...                ...                ...                 ...                       ...                        ...                ...                    ...                    ...                     ...                           ...                            ...                    ...                                      ...                                      ...                                       ...                                             ...                                              ...                                      ...                                   ...                                   ...                                    ...                                          ...                                           ...                                   ...\n",
       "149995                 0                              0.040674   74                                     0     0.225131         2100.0                                4                        0                             1                                     0                 0.0   (70.0, inf]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.0192, 0.0832]   (0.134, 0.287]     (-0.001, 3400.0]                       (-0.001, 4.0]                       (0.0, 1.0]          398        17636         17238            0.039697             0.123151     0.279404                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                           497                                         30000                                          29503                                           0.049571                                             0.210775                                         0.211221               1716              30000               28284                  0.171155                   0.202066           0.613576                   2789                  30289                   27500                      0.278177                       0.196465               0.882076                                     3103                                    33659                                     30556                                        0.309495                                         0.218298                                 0.882845                                  2748                                 52338                                  49590                                     0.274087                                      0.354280                              0.573037\n",
       "149996                 0                              0.299745   44                                     0     0.716562         5584.0                                4                        0                             1                                     0                 2.0  (40.0, 50.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                           (0.271, 0.699]     (0.468, 4.0]     (5400.0, 8250.0]                       (-0.001, 4.0]                       (0.0, 1.0]         2893        35037         32144            0.288550             0.229643     0.813822                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                          2107                                         30000                                          27893                                           0.210154                                             0.199273                                         0.720083               2974              30109               27135                  0.296629                   0.193857           0.928274                   1819                  29961                   28142                      0.181428                       0.201052               0.643114                                     3103                                    33659                                     30556                                        0.309495                                         0.218298                                 0.882845                                  2748                                 52338                                  49590                                     0.274087                                      0.354280                              0.573037\n",
       "149997                 0                              0.246044   58                                     0  3870.000000         5400.0                               18                        0                             1                                     0                 0.0  (50.0, 60.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                          (0.0832, 0.271]  (4.0, 329664.0]     (3400.0, 5400.0]                        (12.0, 58.0]                       (0.0, 1.0]         2149        34806         32657            0.214343             0.233308     0.651655                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                           877                                         30000                                          29123                                           0.087473                                             0.208060                                         0.350952               1653              29891               28238                  0.164871                   0.201737           0.597328                   4031                  59757                   55726                      0.402055                       0.398117               0.698081                                     1846                                    27684                                     25838                                        0.184121                                         0.184591                                 0.691873                                  2748                                 52338                                  49590                                     0.274087                                      0.354280                              0.573037\n",
       "149998                 0                              0.000000   30                                     0     0.000000         5716.0                                4                        0                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (-0.001, 0.0192]  (-0.001, 0.134]     (5400.0, 8250.0]                       (-0.001, 4.0]                      (-inf, 0.0]         3296        32069         28773            0.328745             0.205560     0.955231                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                           582                                         30000                                          29418                                           0.058049                                             0.210168                                         0.243890               1830              30000               28170                  0.182525                   0.201252           0.645506                   1819                  29961                   28142                      0.181428                       0.201052               0.643114                                     3103                                    33659                                     30556                                        0.309495                                         0.218298                                 0.882845                                  4672                                 56188                                  51516                                     0.465988                                      0.368040                              0.818076\n",
       "149999                 0                              0.850283   64                                     0     0.249908         8158.0                                8                        0                             2                                     0                 0.0  (60.0, 70.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]   (0.134, 0.287]     (5400.0, 8250.0]                          (6.0, 9.0]                       (1.0, 2.0]          952        27424         26472            0.094953             0.189121     0.406848                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                          5963                                         30000                                          24037                                           0.594754                                             0.171725                                         1.495914               1716              30000               28284                  0.171155                   0.202066           0.613576                   1819                  29961                   28142                      0.181428                       0.201052               0.643114                                     2017                                    37162                                     35145                                        0.201177                                         0.251082                                 0.588475                                  1765                                 31522                                  29757                                     0.176042                                      0.212589                              0.603269\n",
       "\n",
       "[150000 rows x 81 columns]"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算这些特征的woe# 计算这些特征 \n",
    "def cal_WOE(df, features, target):\n",
    "    df_new = df.copy()\n",
    "    for f in features:\n",
    "        df_woe = df_new.groupby(f).agg({target:['sum', 'count']})\n",
    "        df_woe.columns = list(map(''.join, df_woe.columns.values))\n",
    "        df_woe = df_woe.reset_index()#重新再编码一下\n",
    "        df_woe = df_woe.rename(columns={target+'sum':'bad', target+'count':'all'})\n",
    "        # 这里是下面是计算WOE和IV的过程\n",
    "        df_woe['good'] = df_woe['all'] - df_woe['bad']\n",
    "        df_woe['Margin Bad'] = df_woe['bad'] / df_woe['bad'].sum() \n",
    "        df_woe['Margin Good'] = df_woe['good'] / df_woe['good'].sum()\n",
    "        df_woe['woe'] = np.log1p(df_woe['Margin Bad'] / df_woe['Margin Good'])\n",
    "        # 避免重名  df_woe每次循环一个特征都有一个bad和all  按照名字下划线添加到df_woe里去 \n",
    "        df_woe.columns = [c if c==f else c+'_'+f for c in list(df_woe.columns.values)]#先更名名称 再按照特赠那个字段左连接\n",
    "        df_new = df_new.merge(df_woe, on=f, how='left')\n",
    "    return df_new\n",
    "# 计算这些特征的WOE\n",
    "df_woe = cal_WOE(df_train, bin_cols, 'SeriousDlqin2yrs')\n",
    "df_woe# bin_cols里面有10个特征  每个特征都增加了 bad all good MarginBad MarginGood woe  所以要增加60个字段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['NumberOfTime30-59DaysPastDueNotWorse',\n",
       " 'NumberOfTime60-89DaysPastDueNotWorse',\n",
       " 'NumberOfTimes90DaysLate',\n",
       " 'RevolvingUtilizationOfUnsecuredLines',\n",
       " 'age']"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#只筛选那5个我们要的规则的WOE\n",
    "feature_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>features</th>\n",
       "      <th>bin</th>\n",
       "      <th>woe</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>1.797837</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0.572521</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>2.151185</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>2.429111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>191</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>2.520613</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>251</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>2.774776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>423</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>2.902860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1052</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>2.812612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6909</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>2.024184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10822</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>2.077007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0.645352</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>2.712133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>423</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>3.159234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1146</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>2.955438</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1733</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>2.886833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2406</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>3.164917</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6664</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>3.758483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16642</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>2.915139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23964</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>2.705454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68976</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0.608707</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>2.998746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>2.701853</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1298</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>3.224503</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1713</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>3.379582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1733</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>2.878935</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2910</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>3.691154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3400</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>3.088387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3929</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>4.140397</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5684</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>3.580814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>1.495914</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>0.720083</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>0.350952</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "      <td>0.243890</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "      <td>0.211221</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>age</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>0.813822</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>age</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>0.955231</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>age</td>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>0.279404</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>age</td>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>0.651655</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>age</td>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>0.406848</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>age</td>\n",
       "      <td>(-inf, 25.0]</td>\n",
       "      <td>1.013134</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   features               bin       woe\n",
       "0      NumberOfTime30-59DaysPastDueNotWorse        (1.0, 2.0]  1.797837\n",
       "1      NumberOfTime30-59DaysPastDueNotWorse       (-inf, 1.0]  0.572521\n",
       "13     NumberOfTime30-59DaysPastDueNotWorse        (2.0, 3.0]  2.151185\n",
       "183    NumberOfTime30-59DaysPastDueNotWorse        (3.0, 4.0]  2.429111\n",
       "191    NumberOfTime30-59DaysPastDueNotWorse        (4.0, 5.0]  2.520613\n",
       "251    NumberOfTime30-59DaysPastDueNotWorse        (6.0, 7.0]  2.774776\n",
       "423    NumberOfTime30-59DaysPastDueNotWorse        (9.0, inf]  2.902860\n",
       "1052   NumberOfTime30-59DaysPastDueNotWorse        (5.0, 6.0]  2.812612\n",
       "6909   NumberOfTime30-59DaysPastDueNotWorse        (7.0, 8.0]  2.024184\n",
       "10822  NumberOfTime30-59DaysPastDueNotWorse        (8.0, 9.0]  2.077007\n",
       "0      NumberOfTime60-89DaysPastDueNotWorse       (-inf, 1.0]  0.645352\n",
       "186    NumberOfTime60-89DaysPastDueNotWorse        (1.0, 2.0]  2.712133\n",
       "423    NumberOfTime60-89DaysPastDueNotWorse        (4.0, 5.0]  3.159234\n",
       "1146   NumberOfTime60-89DaysPastDueNotWorse        (2.0, 3.0]  2.955438\n",
       "1733   NumberOfTime60-89DaysPastDueNotWorse        (9.0, inf]  2.886833\n",
       "2406   NumberOfTime60-89DaysPastDueNotWorse        (3.0, 4.0]  3.164917\n",
       "6664   NumberOfTime60-89DaysPastDueNotWorse        (5.0, 6.0]  3.758483\n",
       "16642  NumberOfTime60-89DaysPastDueNotWorse        (6.0, 7.0]  2.915139\n",
       "23964  NumberOfTime60-89DaysPastDueNotWorse        (7.0, 8.0]  2.705454\n",
       "68976  NumberOfTime60-89DaysPastDueNotWorse        (8.0, 9.0]  0.000000\n",
       "0                   NumberOfTimes90DaysLate       (-inf, 1.0]  0.608707\n",
       "13                  NumberOfTimes90DaysLate        (2.0, 3.0]  2.998746\n",
       "186                 NumberOfTimes90DaysLate        (1.0, 2.0]  2.701853\n",
       "1298                NumberOfTimes90DaysLate        (4.0, 5.0]  3.224503\n",
       "1713                NumberOfTimes90DaysLate        (3.0, 4.0]  3.379582\n",
       "1733                NumberOfTimes90DaysLate        (9.0, inf]  2.878935\n",
       "2910                NumberOfTimes90DaysLate        (8.0, 9.0]  3.691154\n",
       "3400                NumberOfTimes90DaysLate        (5.0, 6.0]  3.088387\n",
       "3929                NumberOfTimes90DaysLate        (6.0, 7.0]  4.140397\n",
       "5684                NumberOfTimes90DaysLate        (7.0, 8.0]  3.580814\n",
       "0      RevolvingUtilizationOfUnsecuredLines  (0.699, 50708.0]  1.495914\n",
       "2      RevolvingUtilizationOfUnsecuredLines    (0.271, 0.699]  0.720083\n",
       "3      RevolvingUtilizationOfUnsecuredLines   (0.0832, 0.271]  0.350952\n",
       "11     RevolvingUtilizationOfUnsecuredLines  (-0.001, 0.0192]  0.243890\n",
       "14     RevolvingUtilizationOfUnsecuredLines  (0.0192, 0.0832]  0.211221\n",
       "0                                       age      (40.0, 50.0]  0.813822\n",
       "1                                       age      (25.0, 40.0]  0.955231\n",
       "5                                       age       (70.0, inf]  0.279404\n",
       "6                                       age      (50.0, 60.0]  0.651655\n",
       "15                                      age      (60.0, 70.0]  0.406848\n",
       "19                                      age      (-inf, 25.0]  1.013134"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 得到WOE规则 feature, bin, woe\n",
    "df_bin_to_woe = pd.DataFrame(columns=['features', 'bin', 'woe'])\n",
    "for f in feature_cols:\n",
    "    b = 'bin_' + f\n",
    "    w = 'woe_bin_' + f\n",
    "    df = df_woe[[w, b]].drop_duplicates()#通过bin和woe_bin来获取到指定列的元素，drop_duplicates()可以去重 这里按照woe的数值去重\n",
    "    df.columns = ['woe', 'bin']#起个索引的名称\n",
    "    df['features'] = f#再加上一个字段\n",
    "    df_bin_to_woe = pd.concat([df_bin_to_woe, df])#然后再进行拼接\n",
    "df_bin_to_woe"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 6开始用逻辑回归来进行建模"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6.1筛选特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['woe_bin_age',\n",
       " 'woe_bin_NumberOfDependents',\n",
       " 'woe_bin_NumberOfTime30-59DaysPastDueNotWorse',\n",
       " 'woe_bin_NumberOfTime60-89DaysPastDueNotWorse',\n",
       " 'woe_bin_NumberOfTimes90DaysLate',\n",
       " 'woe_bin_RevolvingUtilizationOfUnsecuredLines',\n",
       " 'woe_bin_DebtRatio',\n",
       " 'woe_bin_MonthlyIncome',\n",
       " 'woe_bin_NumberOfOpenCreditLinesAndLoans',\n",
       " 'woe_bin_NumberRealEstateLoansOrLines']"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "woe_cols=[c for c in list(df_woe.columns.values) if 'woe' in c ]\n",
    "print(len(woe_cols))#上节课这部分这里选取了全部特征的的WOE的值 其实这里没有必要 只选取IV>0.1就行了\n",
    "woe_cols#这里只选特征里面带有WOE的值就行建模  就是上面算出来的那个10个WOE值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['woe_bin_NumberOfTime30-59DaysPastDueNotWorse',\n",
       " 'woe_bin_NumberOfTime60-89DaysPastDueNotWorse',\n",
       " 'woe_bin_NumberOfTimes90DaysLate',\n",
       " 'woe_bin_RevolvingUtilizationOfUnsecuredLines',\n",
       " 'woe_bin_age']"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "woe_cols=['woe_bin_'+c for c in feature_cols]#给上面的feature_cols前面加上woe_bin\n",
    "print(len(woe_cols))\n",
    "woe_cols#这里只选特征里面带有WOE的值就行建模  就是上面算出来的那个10个WOE值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6.2数据集切分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(120000, 5)\n",
      "(30000, 5)\n",
      "(120000,)\n",
      "(30000,)\n"
     ]
    }
   ],
   "source": [
    "#数据集切分\n",
    "from sklearn.model_selection import train_test_split\n",
    "x_train,x_test,y_train,y_test=train_test_split(df_woe[woe_cols],df_woe['SeriousDlqin2yrs'],test_size=0.2,random_state=33)\n",
    "print(x_train.shape)\n",
    "print(x_test.shape)\n",
    "print(y_train.shape)\n",
    "print(y_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    120000.000000\n",
       "mean          0.066867\n",
       "std           0.249792\n",
       "min           0.000000\n",
       "25%           0.000000\n",
       "50%           0.000000\n",
       "75%           0.000000\n",
       "max           1.000000\n",
       "Name: SeriousDlqin2yrs, dtype: float64"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_train.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6.3模型训练与评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9364333333333333\n",
      "0.7787691092695656\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score,roc_auc_score\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "#建好逻辑回归的模型\n",
    "model=LogisticRegression(random_state=2021)\n",
    "model.fit(x_train ,y_train )#注意这个地方报了一下错误 nan toolarge  是因为有个地方出现了inf和NAN  解决的方法是上面np.log变成np.log1p\n",
    "y_pred=model.predict(x_test)\n",
    "print(accuracy_score(y_pred,y_test))#违约比列是6%  前面探索过 target那一列的数据分布   这种情况下准确率93%并不高\n",
    "print(roc_auc_score(y_pred,y_test))#Auc的值有点低  一般大于0.8才可以上线"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 7做评分卡模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "B: 72.13475204444818\n",
      "A: 650.0\n"
     ]
    }
   ],
   "source": [
    "#我们这里    假设PDO=50  根据公式算出来B=7.13    再假设odds=1的时候 P0的分数是650  所以A就是650\n",
    "pdo = 50\n",
    "odds = 1\n",
    "B = pdo / math.log(2)\n",
    "print(\"B:\",B)\n",
    "A = 650 + B*math.log(1)\n",
    "print(\"A:\",A)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>features</th>\n",
       "      <th>bin</th>\n",
       "      <th>woe</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>1.797837</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0.572521</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>2.151185</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>2.429111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>191</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>2.520613</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>251</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>2.774776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>423</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>2.902860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1052</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>2.812612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6909</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>2.024184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10822</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>2.077007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0.645352</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>2.712133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>423</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>3.159234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1146</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>2.955438</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1733</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>2.886833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2406</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>3.164917</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6664</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>3.758483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16642</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>2.915139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23964</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>2.705454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68976</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0.608707</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>2.998746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>2.701853</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1298</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>3.224503</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1713</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>3.379582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1733</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>2.878935</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2910</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>3.691154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3400</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>3.088387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3929</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>4.140397</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5684</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>3.580814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>1.495914</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>0.720083</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>0.350952</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "      <td>0.243890</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "      <td>0.211221</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>age</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>0.813822</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>age</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>0.955231</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>age</td>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>0.279404</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>age</td>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>0.651655</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>age</td>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>0.406848</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>age</td>\n",
       "      <td>(-inf, 25.0]</td>\n",
       "      <td>1.013134</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   features               bin       woe\n",
       "0      NumberOfTime30-59DaysPastDueNotWorse        (1.0, 2.0]  1.797837\n",
       "1      NumberOfTime30-59DaysPastDueNotWorse       (-inf, 1.0]  0.572521\n",
       "13     NumberOfTime30-59DaysPastDueNotWorse        (2.0, 3.0]  2.151185\n",
       "183    NumberOfTime30-59DaysPastDueNotWorse        (3.0, 4.0]  2.429111\n",
       "191    NumberOfTime30-59DaysPastDueNotWorse        (4.0, 5.0]  2.520613\n",
       "251    NumberOfTime30-59DaysPastDueNotWorse        (6.0, 7.0]  2.774776\n",
       "423    NumberOfTime30-59DaysPastDueNotWorse        (9.0, inf]  2.902860\n",
       "1052   NumberOfTime30-59DaysPastDueNotWorse        (5.0, 6.0]  2.812612\n",
       "6909   NumberOfTime30-59DaysPastDueNotWorse        (7.0, 8.0]  2.024184\n",
       "10822  NumberOfTime30-59DaysPastDueNotWorse        (8.0, 9.0]  2.077007\n",
       "0      NumberOfTime60-89DaysPastDueNotWorse       (-inf, 1.0]  0.645352\n",
       "186    NumberOfTime60-89DaysPastDueNotWorse        (1.0, 2.0]  2.712133\n",
       "423    NumberOfTime60-89DaysPastDueNotWorse        (4.0, 5.0]  3.159234\n",
       "1146   NumberOfTime60-89DaysPastDueNotWorse        (2.0, 3.0]  2.955438\n",
       "1733   NumberOfTime60-89DaysPastDueNotWorse        (9.0, inf]  2.886833\n",
       "2406   NumberOfTime60-89DaysPastDueNotWorse        (3.0, 4.0]  3.164917\n",
       "6664   NumberOfTime60-89DaysPastDueNotWorse        (5.0, 6.0]  3.758483\n",
       "16642  NumberOfTime60-89DaysPastDueNotWorse        (6.0, 7.0]  2.915139\n",
       "23964  NumberOfTime60-89DaysPastDueNotWorse        (7.0, 8.0]  2.705454\n",
       "68976  NumberOfTime60-89DaysPastDueNotWorse        (8.0, 9.0]  0.000000\n",
       "0                   NumberOfTimes90DaysLate       (-inf, 1.0]  0.608707\n",
       "13                  NumberOfTimes90DaysLate        (2.0, 3.0]  2.998746\n",
       "186                 NumberOfTimes90DaysLate        (1.0, 2.0]  2.701853\n",
       "1298                NumberOfTimes90DaysLate        (4.0, 5.0]  3.224503\n",
       "1713                NumberOfTimes90DaysLate        (3.0, 4.0]  3.379582\n",
       "1733                NumberOfTimes90DaysLate        (9.0, inf]  2.878935\n",
       "2910                NumberOfTimes90DaysLate        (8.0, 9.0]  3.691154\n",
       "3400                NumberOfTimes90DaysLate        (5.0, 6.0]  3.088387\n",
       "3929                NumberOfTimes90DaysLate        (6.0, 7.0]  4.140397\n",
       "5684                NumberOfTimes90DaysLate        (7.0, 8.0]  3.580814\n",
       "0      RevolvingUtilizationOfUnsecuredLines  (0.699, 50708.0]  1.495914\n",
       "2      RevolvingUtilizationOfUnsecuredLines    (0.271, 0.699]  0.720083\n",
       "3      RevolvingUtilizationOfUnsecuredLines   (0.0832, 0.271]  0.350952\n",
       "11     RevolvingUtilizationOfUnsecuredLines  (-0.001, 0.0192]  0.243890\n",
       "14     RevolvingUtilizationOfUnsecuredLines  (0.0192, 0.0832]  0.211221\n",
       "0                                       age      (40.0, 50.0]  0.813822\n",
       "1                                       age      (25.0, 40.0]  0.955231\n",
       "5                                       age       (70.0, inf]  0.279404\n",
       "6                                       age      (50.0, 60.0]  0.651655\n",
       "15                                      age      (60.0, 70.0]  0.406848\n",
       "19                                      age      (-inf, 25.0]  1.013134"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_bin_to_woe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.87540308 0.45306013 0.72714541 1.4410486  1.10219085]\n",
      "NumberOfTime30-59DaysPastDueNotWorse\n",
      "NumberOfTime60-89DaysPastDueNotWorse\n",
      "NumberOfTimes90DaysLate\n",
      "RevolvingUtilizationOfUnsecuredLines\n",
      "age\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Variable</th>\n",
       "      <th>Binning</th>\n",
       "      <th>Score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>-114</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>-36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>-136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>-153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>-159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>-175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>-183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>-178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>-128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>-131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>-21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>-89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>-103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>-97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>-94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>-103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>-123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>-95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>-88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>-32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>-157</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>-142</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>-169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>-177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>-151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>-194</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>-162</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>-217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>-188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>-155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>-75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>-36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "      <td>-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "      <td>-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>age</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>-65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>age</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>-76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>age</td>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>age</td>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>-52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>age</td>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>-32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>age</td>\n",
       "      <td>(-inf, 25.0]</td>\n",
       "      <td>-81</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                Variable           Binning  Score\n",
       "0   NumberOfTime30-59DaysPastDueNotWorse        (1.0, 2.0]   -114\n",
       "1   NumberOfTime30-59DaysPastDueNotWorse       (-inf, 1.0]    -36\n",
       "2   NumberOfTime30-59DaysPastDueNotWorse        (2.0, 3.0]   -136\n",
       "3   NumberOfTime30-59DaysPastDueNotWorse        (3.0, 4.0]   -153\n",
       "4   NumberOfTime30-59DaysPastDueNotWorse        (4.0, 5.0]   -159\n",
       "5   NumberOfTime30-59DaysPastDueNotWorse        (6.0, 7.0]   -175\n",
       "6   NumberOfTime30-59DaysPastDueNotWorse        (9.0, inf]   -183\n",
       "7   NumberOfTime30-59DaysPastDueNotWorse        (5.0, 6.0]   -178\n",
       "8   NumberOfTime30-59DaysPastDueNotWorse        (7.0, 8.0]   -128\n",
       "9   NumberOfTime30-59DaysPastDueNotWorse        (8.0, 9.0]   -131\n",
       "10  NumberOfTime60-89DaysPastDueNotWorse       (-inf, 1.0]    -21\n",
       "11  NumberOfTime60-89DaysPastDueNotWorse        (1.0, 2.0]    -89\n",
       "12  NumberOfTime60-89DaysPastDueNotWorse        (4.0, 5.0]   -103\n",
       "13  NumberOfTime60-89DaysPastDueNotWorse        (2.0, 3.0]    -97\n",
       "14  NumberOfTime60-89DaysPastDueNotWorse        (9.0, inf]    -94\n",
       "15  NumberOfTime60-89DaysPastDueNotWorse        (3.0, 4.0]   -103\n",
       "16  NumberOfTime60-89DaysPastDueNotWorse        (5.0, 6.0]   -123\n",
       "17  NumberOfTime60-89DaysPastDueNotWorse        (6.0, 7.0]    -95\n",
       "18  NumberOfTime60-89DaysPastDueNotWorse        (7.0, 8.0]    -88\n",
       "19  NumberOfTime60-89DaysPastDueNotWorse        (8.0, 9.0]      0\n",
       "20               NumberOfTimes90DaysLate       (-inf, 1.0]    -32\n",
       "21               NumberOfTimes90DaysLate        (2.0, 3.0]   -157\n",
       "22               NumberOfTimes90DaysLate        (1.0, 2.0]   -142\n",
       "23               NumberOfTimes90DaysLate        (4.0, 5.0]   -169\n",
       "24               NumberOfTimes90DaysLate        (3.0, 4.0]   -177\n",
       "25               NumberOfTimes90DaysLate        (9.0, inf]   -151\n",
       "26               NumberOfTimes90DaysLate        (8.0, 9.0]   -194\n",
       "27               NumberOfTimes90DaysLate        (5.0, 6.0]   -162\n",
       "28               NumberOfTimes90DaysLate        (6.0, 7.0]   -217\n",
       "29               NumberOfTimes90DaysLate        (7.0, 8.0]   -188\n",
       "30  RevolvingUtilizationOfUnsecuredLines  (0.699, 50708.0]   -155\n",
       "31  RevolvingUtilizationOfUnsecuredLines    (0.271, 0.699]    -75\n",
       "32  RevolvingUtilizationOfUnsecuredLines   (0.0832, 0.271]    -36\n",
       "33  RevolvingUtilizationOfUnsecuredLines  (-0.001, 0.0192]    -25\n",
       "34  RevolvingUtilizationOfUnsecuredLines  (0.0192, 0.0832]    -22\n",
       "35                                   age      (40.0, 50.0]    -65\n",
       "36                                   age      (25.0, 40.0]    -76\n",
       "37                                   age       (70.0, inf]    -22\n",
       "38                                   age      (50.0, 60.0]    -52\n",
       "39                                   age      (60.0, 70.0]    -32\n",
       "40                                   age      (-inf, 25.0]    -81"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#写一个函数 生成一个评分卡模型\n",
    "def generate_scorecard(model_coef, binning_df, features, B):    \n",
    "    lst = []\n",
    "    cols = ['Variable', 'Binning', 'Score']\n",
    "    coef = model_coef[0]#模型系数 就是逻辑回归那权重系数\n",
    "    print(coef)\n",
    "    for i in range(len(features)):#这里的feature一共有5个\n",
    "        f = features[i]#把每个feature打印出来\n",
    "        print(f)        \n",
    "        #得到这个feature的WOE规则 这里是把上面已经计算好的df_bin_to_woe那个表 赋值给这里的binning_df 然后通过特征就拿到了WOE规则\n",
    "        df = binning_df[binning_df['features'] == f]\n",
    "        #print(df)\n",
    "        for index, row in df.iterrows():\n",
    "            #这里是按照公式去计算评分卡的分数\n",
    "            score = int(round(-coef[i] * row['woe'] * B)) #coef前面是- 因为需要用基准分减去这个值 round四舍五入 int保留整数部分\n",
    "            lst.append([f,row['bin'], score])\n",
    "    data = pd.DataFrame(lst, columns= cols)\n",
    "    return data\n",
    "score_card = generate_scorecard(model.coef_, df_bin_to_woe, feature_cols, B)\n",
    "score_card#这样就得到了评分卡的规则的表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>Variable</th>\n",
       "      <th>Binning</th>\n",
       "      <th>Score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Variable</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"10\" valign=\"top\">NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>1</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>-36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>-114</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>-128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>-131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>-136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>-153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>-159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>-175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>-178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>-183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"10\" valign=\"top\">NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>19</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>-21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>-88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>-89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>-94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>-95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>-97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>-103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>-103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>-123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"10\" valign=\"top\">NumberOfTimes90DaysLate</th>\n",
       "      <th>20</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>-32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>-142</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>-151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>-157</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>-162</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>-169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>-177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>-188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>-194</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>-217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>34</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "      <td>-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "      <td>-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>-36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>-75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>-155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"6\" valign=\"top\">age</th>\n",
       "      <th>37</th>\n",
       "      <td>age</td>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>age</td>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>-32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>age</td>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>-52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>age</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>-65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>age</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>-76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>age</td>\n",
       "      <td>(-inf, 25.0]</td>\n",
       "      <td>-81</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                     Variable           Binning  Score\n",
       "Variable                                                                                              \n",
       "NumberOfTime30-59DaysPastDueNotWorse 1   NumberOfTime30-59DaysPastDueNotWorse       (-inf, 1.0]    -36\n",
       "                                     0   NumberOfTime30-59DaysPastDueNotWorse        (1.0, 2.0]   -114\n",
       "                                     8   NumberOfTime30-59DaysPastDueNotWorse        (7.0, 8.0]   -128\n",
       "                                     9   NumberOfTime30-59DaysPastDueNotWorse        (8.0, 9.0]   -131\n",
       "                                     2   NumberOfTime30-59DaysPastDueNotWorse        (2.0, 3.0]   -136\n",
       "                                     3   NumberOfTime30-59DaysPastDueNotWorse        (3.0, 4.0]   -153\n",
       "                                     4   NumberOfTime30-59DaysPastDueNotWorse        (4.0, 5.0]   -159\n",
       "                                     5   NumberOfTime30-59DaysPastDueNotWorse        (6.0, 7.0]   -175\n",
       "                                     7   NumberOfTime30-59DaysPastDueNotWorse        (5.0, 6.0]   -178\n",
       "                                     6   NumberOfTime30-59DaysPastDueNotWorse        (9.0, inf]   -183\n",
       "NumberOfTime60-89DaysPastDueNotWorse 19  NumberOfTime60-89DaysPastDueNotWorse        (8.0, 9.0]      0\n",
       "                                     10  NumberOfTime60-89DaysPastDueNotWorse       (-inf, 1.0]    -21\n",
       "                                     18  NumberOfTime60-89DaysPastDueNotWorse        (7.0, 8.0]    -88\n",
       "                                     11  NumberOfTime60-89DaysPastDueNotWorse        (1.0, 2.0]    -89\n",
       "                                     14  NumberOfTime60-89DaysPastDueNotWorse        (9.0, inf]    -94\n",
       "                                     17  NumberOfTime60-89DaysPastDueNotWorse        (6.0, 7.0]    -95\n",
       "                                     13  NumberOfTime60-89DaysPastDueNotWorse        (2.0, 3.0]    -97\n",
       "                                     12  NumberOfTime60-89DaysPastDueNotWorse        (4.0, 5.0]   -103\n",
       "                                     15  NumberOfTime60-89DaysPastDueNotWorse        (3.0, 4.0]   -103\n",
       "                                     16  NumberOfTime60-89DaysPastDueNotWorse        (5.0, 6.0]   -123\n",
       "NumberOfTimes90DaysLate              20               NumberOfTimes90DaysLate       (-inf, 1.0]    -32\n",
       "                                     22               NumberOfTimes90DaysLate        (1.0, 2.0]   -142\n",
       "                                     25               NumberOfTimes90DaysLate        (9.0, inf]   -151\n",
       "                                     21               NumberOfTimes90DaysLate        (2.0, 3.0]   -157\n",
       "                                     27               NumberOfTimes90DaysLate        (5.0, 6.0]   -162\n",
       "                                     23               NumberOfTimes90DaysLate        (4.0, 5.0]   -169\n",
       "                                     24               NumberOfTimes90DaysLate        (3.0, 4.0]   -177\n",
       "                                     29               NumberOfTimes90DaysLate        (7.0, 8.0]   -188\n",
       "                                     26               NumberOfTimes90DaysLate        (8.0, 9.0]   -194\n",
       "                                     28               NumberOfTimes90DaysLate        (6.0, 7.0]   -217\n",
       "RevolvingUtilizationOfUnsecuredLines 34  RevolvingUtilizationOfUnsecuredLines  (0.0192, 0.0832]    -22\n",
       "                                     33  RevolvingUtilizationOfUnsecuredLines  (-0.001, 0.0192]    -25\n",
       "                                     32  RevolvingUtilizationOfUnsecuredLines   (0.0832, 0.271]    -36\n",
       "                                     31  RevolvingUtilizationOfUnsecuredLines    (0.271, 0.699]    -75\n",
       "                                     30  RevolvingUtilizationOfUnsecuredLines  (0.699, 50708.0]   -155\n",
       "age                                  37                                   age       (70.0, inf]    -22\n",
       "                                     39                                   age      (60.0, 70.0]    -32\n",
       "                                     38                                   age      (50.0, 60.0]    -52\n",
       "                                     35                                   age      (40.0, 50.0]    -65\n",
       "                                     36                                   age      (25.0, 40.0]    -76\n",
       "                                     40                                   age      (-inf, 25.0]    -81"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#按照名称分组聚合  然后按照分数排序\n",
    "sorted_score_card = score_card.groupby('Variable').apply(lambda x: x.sort_values('Score', ascending= False))\n",
    "sorted_score_card"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "#中间有一些 inf 转成字符类型\n",
    "def str_to_int(s):\n",
    "    \"\"\"这个函数作用：将字符转为数值, 主要是为了转换无穷大小的值\"\"\"\n",
    "    if s == '-inf':\n",
    "        return -999999\n",
    "    if s == 'inf':\n",
    "        return 999999\n",
    "    return float(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "#将value映射到bin\n",
    "def map_value_to_bin(feature_value, feature_to_bin):\n",
    "    \"\"\"#将value映射到bin\"\"\"\n",
    "    for index, row in feature_to_bin.iterrows():\n",
    "        bins = str(row['Binning'])#先取出 Binning 字段 然后转成str字段\n",
    "        left_open = bins[0] == '('#一般是一个左开右闭的区间  \n",
    "        right_open = bins[-1] == ')'#如果有侧为 ]  右侧部分为0\n",
    "        binnings = bins[1:-1].split(',')\n",
    "        in_range = True\n",
    "        #检查左括号  是左开区间\n",
    "        if left_open:\n",
    "            if feature_value <= str_to_int(binnings[0]):\n",
    "                in_range = False\n",
    "        else:#如果不是左开区间\n",
    "            if feature_value < str_to_int(binnings[0]):\n",
    "                in_range = False                \n",
    "        #检查右括号\n",
    "        if right_open:#）\n",
    "            if feature_value >= str_to_int(binnings[1]):\n",
    "                in_range = False\n",
    "        else:#]\n",
    "            if feature_value > str_to_int(binnings[1]):\n",
    "                in_range = False\n",
    "        if in_range:#是不是在这个区间内\n",
    "            return row['Binning']\n",
    "    return null"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "#score_card就是上面得到的评分卡规则那个表\n",
    "def map_to_score(df, score_card):\n",
    "    \"\"\"通过评分卡规则转换为分数\"\"\"\n",
    "    score_cols = list(score_card['Variable'].unique())#拿到Variable那一列然后去重做成一个list\n",
    "    score = 0\n",
    "    for col in score_cols:#从遍历Variable里面的值\n",
    "        feature_to_bin = score_card[score_card['Variable'] == col]\n",
    "        feature_value = df[col]\n",
    "        #将value映射到bin  用一下map_to_bin那个函数  喂入的是feature_value和feature_to_bin\n",
    "        selected_bin = map_value_to_bin(feature_value, feature_to_bin)\n",
    "        selected_record_in_scorecard = feature_to_bin[feature_to_bin['Binning'] == selected_bin]#找到分数  通过 selected_bin来选择\n",
    "        score = score + selected_record_in_scorecard['Score'].iloc[0]#然后按照分数做累加  \n",
    "    return score#然后返回score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "#使用评分卡模型计算分数\n",
    "def calculate_score_with_card(df, score_card, A):\n",
    "    \"\"\" 使用模型计算分数\"\"\"\n",
    "    df['score'] = df.apply(map_to_score, args=(score_card,), axis = 1)\n",
    "    df['score'] += A #加上基准分\n",
    "    df['score'] = df['score'].astype(int)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 8评分卡效果测试预览"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>age</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>126281</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.421074</td>\n",
       "      <td>42</td>\n",
       "      <td>421</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>122578</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.135995</td>\n",
       "      <td>45</td>\n",
       "      <td>460</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136996</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.166946</td>\n",
       "      <td>71</td>\n",
       "      <td>503</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70155</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.517720</td>\n",
       "      <td>50</td>\n",
       "      <td>421</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96713</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.089805</td>\n",
       "      <td>28</td>\n",
       "      <td>449</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>127770</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.300350</td>\n",
       "      <td>57</td>\n",
       "      <td>434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>128728</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.252509</td>\n",
       "      <td>35</td>\n",
       "      <td>371</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>116986</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.278211</td>\n",
       "      <td>64</td>\n",
       "      <td>454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21065</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.028903</td>\n",
       "      <td>43</td>\n",
       "      <td>474</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118914</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.146637</td>\n",
       "      <td>57</td>\n",
       "      <td>473</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        NumberOfTime30-59DaysPastDueNotWorse  NumberOfTime60-89DaysPastDueNotWorse  NumberOfTimes90DaysLate  RevolvingUtilizationOfUnsecuredLines  age  score\n",
       "126281                                     0                                     0                        0                              0.421074   42    421\n",
       "122578                                     0                                     0                        0                              0.135995   45    460\n",
       "136996                                     0                                     0                        0                              0.166946   71    503\n",
       "70155                                      1                                     0                        0                              0.517720   50    421\n",
       "96713                                      0                                     0                        0                              0.089805   28    449\n",
       "127770                                     1                                     0                        0                              0.300350   57    434\n",
       "128728                                     2                                     0                        0                              0.252509   35    371\n",
       "116986                                     1                                     0                        0                              0.278211   64    454\n",
       "21065                                      0                                     0                        0                              0.028903   43    474\n",
       "118914                                     0                                     0                        0                              0.146637   57    473"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#随机抽选几名表现好的客户来看看评分情况,这里选10个看看\n",
    "good_sample = df_train[df_train['SeriousDlqin2yrs']==0].sample(10)\n",
    "good_sample = good_sample[feature_cols]\n",
    "calculate_score_with_card(good_sample, score_card, A)#应用评分卡 计算分数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>age</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>17910</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>30</td>\n",
       "      <td>335</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>106671</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.070404</td>\n",
       "      <td>64</td>\n",
       "      <td>507</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>137642</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.689243</td>\n",
       "      <td>40</td>\n",
       "      <td>410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75710</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.585918</td>\n",
       "      <td>54</td>\n",
       "      <td>434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10262</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.051134</td>\n",
       "      <td>48</td>\n",
       "      <td>474</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>106752</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.817553</td>\n",
       "      <td>39</td>\n",
       "      <td>252</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48495</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.873861</td>\n",
       "      <td>55</td>\n",
       "      <td>354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30801</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.314437</td>\n",
       "      <td>36</td>\n",
       "      <td>410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80743</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.778912</td>\n",
       "      <td>56</td>\n",
       "      <td>354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32536</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1.641698</td>\n",
       "      <td>54</td>\n",
       "      <td>354</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        NumberOfTime30-59DaysPastDueNotWorse  NumberOfTime60-89DaysPastDueNotWorse  NumberOfTimes90DaysLate  RevolvingUtilizationOfUnsecuredLines  age  score\n",
       "17910                                      0                                     1                        3                              0.000000   30    335\n",
       "106671                                     1                                     0                        0                              0.070404   64    507\n",
       "137642                                     0                                     0                        0                              0.689243   40    410\n",
       "75710                                      0                                     0                        0                              0.585918   54    434\n",
       "10262                                      0                                     0                        0                              0.051134   48    474\n",
       "106752                                     2                                     0                        0                              1.817553   39    252\n",
       "48495                                      0                                     0                        0                              0.873861   55    354\n",
       "30801                                      0                                     0                        0                              0.314437   36    410\n",
       "80743                                      0                                     0                        0                              0.778912   56    354\n",
       "32536                                      1                                     1                        0                              1.641698   54    354"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#同理看看几名表现差的客户\n",
    "bad_sample = df_train[df_train['SeriousDlqin2yrs']==1].sample(10)   #两边一对比 还是可以看出一些问题的 好的分数普遍高一些 坏的普遍差一些\n",
    "bad_sample = bad_sample[feature_cols]\n",
    "calculate_score_with_card(bad_sample, score_card, A)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
